Skip to content

Commit b839e02

Browse files
authored
Add GCP support (#144)
This pull request includes several changes to the `Makefile`, Go source files, and documentation, primarily focusing on adding new functionality, updating configurations, and improving documentation. The most important changes are grouped by theme below: ### Makefile Enhancements: * Added a new `image-clean` target to remove Docker images. * Updated the `clean` target to remove additional binaries specified by `CONTRIB_BINARIES`. ### Go Source File Updates: * Introduced the `ApplyPriorityLevelConfiguration` function in `cmd/kperf/commands/utils/helper.go` to apply Kubernetes PriorityLevelConfiguration using `kubectl`. * Updated `cmd/kperf/commands/virtualcluster/nodepool.go` to call the new `ApplyPriorityLevelConfiguration` function. ### Documentation Improvements: * Added instructions for obtaining KubeConfig for Azure, AWS, and GCP in `docs/getting-started.md`. * Updated runner group specifications and example commands in `docs/getting-started.md` to reflect current configurations and image versions. [[1]](diffhunk://#diff-31bcba2ccafa41d46fbbd6d1219f7f1e3b1fb3cad9faa8e4dc521bbb579dd7b3L277-R300) [[2]](diffhunk://#diff-31bcba2ccafa41d46fbbd6d1219f7f1e3b1fb3cad9faa8e4dc521bbb579dd7b3L449-R465) * Revised benchmark scenario descriptions and options in `docs/runkperf.md` to reflect updated configurations and image versions. [[1]](diffhunk://#diff-688542738f7367395493ff070565f201bbfd1725bf503cb82693121b327a01b3L16-R26) [[2]](diffhunk://#diff-688542738f7367395493ff070565f201bbfd1725bf503cb82693121b327a01b3L36-R46) [[3]](diffhunk://#diff-688542738f7367395493ff070565f201bbfd1725bf503cb82693121b327a01b3L56-R57) ### Helm Chart Updates: * Added labels and annotations to Helm templates for `FlowSchema` in `manifests/runnergroup/server/templates/flowcontrol.yaml` and `manifests/virtualcluster/nodecontrollers/templates/flowcontrol.yaml`. [[1]](diffhunk://#diff-c87b0ac3f211ee3e24694365efaa53d4acf41f01f7029e3d115b5d529cdc086cR6-R10) [[2]](diffhunk://#diff-195fd55124eae8c543a248fd9404448a0aaa30687cdfc7d2e434ea0c9c2370bdR6-R16)
1 parent ee26b10 commit b839e02

File tree

9 files changed

+112
-21
lines changed

9 files changed

+112
-21
lines changed

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ image-push: image-build ## push image
4444
@echo pushing ${IMAGE_NAME}
4545
@docker push ${IMAGE_NAME}
4646

47+
image-clean: ## clean image
48+
@echo cleaning ${IMAGE_NAME}
49+
@docker rmi ${IMAGE_NAME}
50+
4751
test: ## run test
4852
@go test -v ./...
4953

@@ -53,6 +57,7 @@ lint: ## run lint
5357
.PHONY: clean
5458
clean: ## clean up binaries
5559
@rm -f $(BINARIES)
60+
@rm -f $(CONTRIB_BINARIES)
5661

5762
.PHONY: help
5863
help: ## this help

cmd/kperf/commands/utils/helper.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,16 @@
44
package utils
55

66
import (
7+
"context"
78
"fmt"
89
"os"
910
"path/filepath"
1011
"strings"
1112

13+
flowcontrolv1beta3 "k8s.io/api/flowcontrol/v1beta3"
14+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15+
"k8s.io/client-go/kubernetes"
16+
"k8s.io/client-go/tools/clientcmd"
1217
"k8s.io/client-go/util/homedir"
1318
)
1419

@@ -60,3 +65,53 @@ func inCluster() bool {
6065
return os.Getenv("KUBERNETES_SERVICE_HOST") != "" &&
6166
os.Getenv("KUBERNETES_SERVICE_PORT") != ""
6267
}
68+
69+
// ApplyPriorityLevelConfiguration applies the PriorityLevelConfiguration manifest using kubectl.
70+
func ApplyPriorityLevelConfiguration(kubeconfigPath string) error {
71+
// Load the kubeconfig file
72+
config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath)
73+
if err != nil {
74+
return fmt.Errorf("failed to load kubeconfig: %v", err)
75+
}
76+
77+
// Create a Kubernetes client
78+
clientset, err := kubernetes.NewForConfig(config)
79+
if err != nil {
80+
return fmt.Errorf("failed to create Kubernetes client: %v", err)
81+
}
82+
83+
// Define the PriorityLevelConfiguration
84+
lendablePercent := int32(30)
85+
plc := &flowcontrolv1beta3.PriorityLevelConfiguration{
86+
TypeMeta: metav1.TypeMeta{
87+
APIVersion: "flowcontrol.apiserver.k8s.io/v1beta3",
88+
Kind: "PriorityLevelConfiguration",
89+
},
90+
ObjectMeta: metav1.ObjectMeta{
91+
Name: "custom-system",
92+
},
93+
Spec: flowcontrolv1beta3.PriorityLevelConfigurationSpec{
94+
Type: flowcontrolv1beta3.PriorityLevelEnablementLimited,
95+
Limited: &flowcontrolv1beta3.LimitedPriorityLevelConfiguration{
96+
LendablePercent: &lendablePercent,
97+
LimitResponse: flowcontrolv1beta3.LimitResponse{
98+
Type: flowcontrolv1beta3.LimitResponseTypeQueue,
99+
Queuing: &flowcontrolv1beta3.QueuingConfiguration{
100+
Queues: 64,
101+
HandSize: 6,
102+
QueueLengthLimit: 50,
103+
},
104+
},
105+
},
106+
},
107+
}
108+
109+
// Apply the PriorityLevelConfiguration
110+
_, err = clientset.FlowcontrolV1beta3().PriorityLevelConfigurations().Create(context.TODO(), plc, metav1.CreateOptions{})
111+
if err != nil {
112+
return fmt.Errorf("failed to apply PriorityLevelConfiguration: %v", err)
113+
}
114+
115+
fmt.Printf("Successfully applied PriorityLevelConfiguration: %s\n", plc.Name)
116+
return nil
117+
}

cmd/kperf/commands/virtualcluster/nodepool.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ var nodepoolAddCommand = cli.Command{
8484

8585
kubeCfgPath := cliCtx.GlobalString("kubeconfig")
8686

87+
err := utils.ApplyPriorityLevelConfiguration(kubeCfgPath)
88+
if err != nil {
89+
return fmt.Errorf("failed to apply priority level configuration: %w", err)
90+
}
91+
8792
affinityLabels, err := utils.KeyValuesMap(cliCtx.StringSlice("affinity"))
8893
if err != nil {
8994
return fmt.Errorf("failed to parse affinity: %w", err)

contrib/cmd/runkperf/commands/bench/root.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@ var Command = cli.Command{
3737
cli.StringFlag{
3838
Name: "vc-affinity",
3939
Usage: "Deploy virtualnode's controller with a specific labels (FORMAT: KEY=VALUE[,VALUE])",
40-
Value: "node.kubernetes.io/instance-type=Standard_D8s_v3,m4.2xlarge",
40+
Value: "node.kubernetes.io/instance-type=Standard_D8s_v3,m4.2xlarge,n1-standard-8",
4141
},
4242
cli.StringFlag{
4343
Name: "rg-affinity",
4444
Usage: "Deploy runner group with a specific labels (FORMAT: KEY=VALUE[,VALUE])",
45-
Value: "node.kubernetes.io/instance-type=Standard_D16s_v3,m4.4xlarge",
45+
Value: "node.kubernetes.io/instance-type=Standard_D16s_v3,m4.4xlarge,n1-standard-16",
4646
},
4747
cli.BoolFlag{
4848
Name: "eks",

contrib/cmd/runkperf/commands/warmup/command.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,12 @@ var Command = cli.Command{
5959
cli.StringFlag{
6060
Name: "vc-affinity",
6161
Usage: "Deploy virtualnode's controller with a specific labels (FORMAT: KEY=VALUE[,VALUE])",
62-
Value: "node.kubernetes.io/instance-type=Standard_D8s_v3,m4.2xlarge",
62+
Value: "node.kubernetes.io/instance-type=Standard_D8s_v3,m4.2xlarge,n1-standard-8",
6363
},
6464
cli.StringFlag{
6565
Name: "rg-affinity",
6666
Usage: "Deploy runner group with a specific labels (FORMAT: KEY=VALUE[,VALUE])",
67-
Value: "node.kubernetes.io/instance-type=Standard_D16s_v3,m4.4xlarge",
67+
Value: "node.kubernetes.io/instance-type=Standard_D16s_v3,m4.4xlarge,n1-standard-16",
6868
},
6969
cli.BoolFlag{
7070
Name: "eks",

docs/getting-started.md

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,22 @@ sudo make install
4444
By default, the binaries will be in `/usr/local/bin`. The install prefix can be
4545
changed by passing the `PREFIX` variable (default: `/usr/local`).
4646

47+
## Getting KubeConfig
48+
### Azure
49+
```bash
50+
az aks get-credentials --location <REGION> --resource-group <RESOURCE_GROUP> --name <CLUSTER_NAME> --overwrite-existing
51+
```
52+
53+
### AWS
54+
```bash
55+
eksctl utils write-kubeconfig --cluster=<CLUSTER_NAME> --region=<REGION>
56+
```
57+
58+
### GCP
59+
```bash
60+
gcloud container clusters get-credentials <CLUSTER_NAME> --region <REGION>
61+
```
62+
4763
## Using kperf
4864

4965
### kperf-runner run
@@ -219,7 +235,7 @@ If you want to run benchmark in Kubernetes cluster, please use `kperf runnergrou
219235

220236
### kperf-runnergroup
221237

222-
The `kperf runnergroup` command manages a group of runners within a target Kubernetes cluster.
238+
The `kperf runnergroup` command manages a group of runners within a target Kubernetes cluster.
223239
A runner group consists of multiple runners, with each runner deployed as an individual Pod for the `kperf runner` process.
224240
These runners not only generate requests within the cluster but can also issue requests from multiple endpoints,
225241
mitigating limitations such as network bandwidth constraints.
@@ -274,14 +290,14 @@ loadProfile:
274290
# nodeAffinity defines how to deploy runners into dedicated nodes which have specific labels.
275291
nodeAffinity:
276292
node.kubernetes.io/instance-type:
277-
- Standard_DS2_v2
293+
- n1-standard-16
278294
```
279295

280296
Let's say the local file `/tmp/example-runnergroup-spec.yaml`. You can run:
281297

282298
```bash
283299
$ kperf rg run \
284-
--runner-image=telescope.azurecr.io/oss/kperf:v0.1.5 \
300+
--runner-image=ghcr.io/azure/kperf:0.1.8 \
285301
--runnergroup="file:///tmp/example-runnergroup-spec.yaml"
286302
```
287303

@@ -446,7 +462,7 @@ You can use the following command to add nodepool named by `example` with 10 nod
446462
```bash
447463
$ kperf vc nodepool add example \
448464
--nodes=10 --cpu=32 --memory=96 --max-pods=50 \
449-
--affinity="node.kubernetes.io/instance-type=Standard_DS2_v2"
465+
--affinity="node.kubernetes.io/instance-type=n1-standard-16"
450466
```
451467

452468
> NOTE: The `--affinity` is used to deploy node controller (kwok) to nodes with the specific labels.

docs/runkperf.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@ runkperf includes three benchmark scenarios, one of which focuses on measuring
1313
performance and stability with 3,000 short-lifecycle pods distributed across 100 nodes.
1414

1515
```bash
16-
$ runkperf bench --runner-image telescope.azurect.io/oss/kperf:v0.1.5 node100_job1_pod3k --help
16+
$ runkperf bench --runner-image ghcr.io/azure/kperf:0.1.8 node10_job1_pod100 --help
1717

1818
NAME:
19-
runkperf bench node100_job1_pod3k -
19+
runkperf bench node10_job1_pod100 -
2020

21-
The test suite is to setup 100 virtual nodes and deploy one job with 3k pods on
21+
The test suite is to setup 10 virtual nodes and deploy one job with 100 pods on
2222
that nodes. It repeats to create and delete job. The load profile is fixed.
2323

2424

2525
USAGE:
26-
runkperf bench node100_job1_pod3k [command options] [arguments...]
26+
runkperf bench node10_job1_pod100 [command options] [arguments...]
2727

2828
OPTIONS:
2929
--total value Total requests per runner (There are 10 runners totally and runner's rate is 10) (default: 36000)
@@ -33,28 +33,28 @@ OPTIONS:
3333
--content-type value Content type (json or protobuf) (default: "json")
3434
```
3535
36-
This test eliminates the need to set up 100 physical nodes, as kperf leverages
36+
This test eliminates the need to set up many physical nodes, as kperf leverages
3737
[kwok](https://github.com/kubernetes-sigs/kwok) to simulate both nodes and pod
38-
lifecycles. Only a few physical nodes are required to host **5** kperf runners
39-
and **100** kwok controllers.
38+
lifecycles. Only a few physical nodes are required to run large scale benchmark
39+
with **5** kperf runners and **100** kwok controllers.
4040
4141
We **recommend** using two separate node pools in the target Kubernetes cluster
4242
to host the kperf runners and Kwok controllers independently. By default, runkperf
4343
schedules:
4444
45-
* Runners on nodes with instance type: **Standard_D16s_v3** on Azure or **m4.4xlarge** on AWS
46-
* kwok controllers on nodes with instance type: **Standard_D8s_v3** on Azure or **m4.2xlarge** on AWS
45+
* Runners on nodes with instance type: **Standard_D16s_v3** on Azure or **m4.4xlarge** on AWS or **n1-standard-8** on GCP
46+
* kwok controllers on nodes with instance type: **Standard_D8s_v3** on Azure or **m4.2xlarge** on AWS or **n1-standard-16** on GCP
4747
48-
You can modify the scheduling affinity for runners and controllers using the
48+
You can modify the scheduling affinity for runners and controllers using the
4949
`--rg-affinity` and `--vc-affinity` options. Please check `runkperf bench --help` for more details.
5050
5151
When that target cluster is ready, you can run
5252
5353
```bash
5454
$ sudo runkperf -v 3 bench \
5555
--kubeconfig $HOME/.kube/config \
56-
--runner-image telescope.azurecr.io/oss/kperf:v0.1.5 \
57-
node100_job1_pod3k --total 1000
56+
--runner-image ghcr.io/azure/kperf:0.1.8 \
57+
node10_job1_pod100 --total 1000
5858
```
5959
6060
> NOTE: The `sudo` allows that command to create [mount_namespaces(7)](https://man7.org/linux/man-pages/man7/mount_namespaces.7.html)

manifests/runnergroup/server/templates/flowcontrol.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ kind: FlowSchema
33
metadata:
44
name: {{ .Values.name }}
55
namespace: {{ .Release.Namespace }}
6+
labels:
7+
app.kubernetes.io/managed-by: "Helm"
8+
annotations:
9+
meta.helm.sh/release-name: "{{ .Release.Name }}"
10+
meta.helm.sh/release-namespace: "{{ .Release.Namespace }}"
611
spec:
712
distinguisherMethod:
813
type: ByUser

manifests/virtualcluster/nodecontrollers/templates/flowcontrol.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,17 @@ kind: FlowSchema
33
metadata:
44
name: {{ .Values.name }}
55
namespace: {{ .Release.Namespace }}
6+
labels:
7+
app.kubernetes.io/managed-by: "Helm"
8+
annotations:
9+
meta.helm.sh/release-name: "{{ .Release.Name }}"
10+
meta.helm.sh/release-namespace: "{{ .Release.Namespace }}"
611
spec:
712
distinguisherMethod:
813
type: ByUser
914
matchingPrecedence: 500
1015
priorityLevelConfiguration:
11-
name: system
16+
name: custom-system
1217
rules:
1318
- resourceRules:
1419
- apiGroups:

0 commit comments

Comments
 (0)