Skip to content

Commit 64cb6dc

Browse files
authored
Merge branch 'main' into bbr-docs
2 parents 5ddaf28 + 0a28208 commit 64cb6dc

File tree

110 files changed

+4415
-2844
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

110 files changed

+4415
-2844
lines changed

.github/workflows/kal.yml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,5 @@ jobs:
2020
persist-credentials: false
2121
- name: Set up Go
2222
uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # tag=v5.5.0
23-
- name: Install Golang CI Lint
24-
run: go install github.com/golangci/golangci-lint/v2/cmd/[email protected]
25-
- name: Build KAL
26-
run: golangci-lint custom
27-
- name: run api linter
28-
run: ./bin/golangci-kube-api-linter run -c ./.golangci-kal.yml ./...
23+
- name: Run API Linter
24+
run: make api-lint
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
name: Label non-main PRs
2+
3+
on:
4+
pull_request:
5+
types: [opened, edited, synchronize, reopened]
6+
7+
jobs:
8+
add-label:
9+
runs-on: ubuntu-latest
10+
steps:
11+
- name: Add labels when base branch is not main
12+
if: github.event.pull_request.base.ref != 'main'
13+
uses: actions-ecosystem/action-add-labels@v1
14+
with:
15+
github_token: ${{ secrets.GITHUB_TOKEN }}
16+
labels: |
17+
do-not-merge/hold
18+
do-not-merge/cherry-pick-not-approved

Makefile

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,12 @@ lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes
160160
ci-lint: golangci-lint
161161
$(GOLANGCI_LINT) run --timeout 15m0s
162162

163+
.PHONY: api-lint
164+
api-lint: golangci-api-lint
165+
$(GOLANGCI_API_LINT) run -c .golangci-kal.yml --timeout 15m0s ./...
166+
163167
.PHONY: verify
164-
verify: vet fmt-verify generate ci-lint verify-all
168+
verify: vet fmt-verify generate ci-lint api-lint verify-all
165169
git --no-pager diff --exit-code config api client-go
166170

167171
.PHONY: verify-crds
@@ -366,6 +370,7 @@ CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen
366370
ENVTEST ?= $(LOCALBIN)/setup-envtest
367371
CRD_REF_DOCS ?= $(LOCALBIN)/crd-ref-docs
368372
GOLANGCI_LINT = $(LOCALBIN)/golangci-lint
373+
GOLANGCI_API_LINT = $(LOCALBIN)/golangci-kube-api-linter
369374
HELM = $(PROJECT_DIR)/bin/helm
370375
YQ = $(PROJECT_DIR)/bin/yq
371376
KUBECTL_VALIDATE = $(PROJECT_DIR)/bin/kubectl-validate
@@ -407,6 +412,11 @@ golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary.
407412
$(GOLANGCI_LINT): $(LOCALBIN)
408413
$(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/v2/cmd/golangci-lint,$(GOLANGCI_LINT_VERSION))
409414

415+
.PHONY: golangci-api-lint
416+
golangci-api-lint: golangci-lint $(GOLANGCI_API_LINT) ## Download golangci-lint locally if necessary before building KAL
417+
$(GOLANGCI_API_LINT):
418+
$(GOLANGCI_LINT) custom
419+
410420
.PHONY: yq
411421
yq: ## Download yq locally if necessary.
412422
GOBIN=$(PROJECT_DIR)/bin GO111MODULE=on go install github.com/mikefarah/yq/v4@$(YQ_VERSION)

README.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,6 @@ This is achieved by leveraging Envoy's [External Processing] (ext-proc) to exten
1010

1111
[Inference Gateway]:#concepts-and-definitions
1212

13-
14-
> ***NOTE*** : As we prep for our `v1` release, some of our docs may fall out of scope, we are working hard to get these up to date and they will be ready by the time we launch `v1`. Thanks!
15-
1613
## New!
1714
Inference Gateway has partnered with vLLM to accelerate LLM serving optimizations with [llm-d](https://llm-d.ai/blog/llm-d-announce)!
1815

bbr.Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ RUN go mod download
1818
COPY cmd/bbr ./cmd
1919
COPY pkg ./pkg
2020
COPY internal ./internal
21+
COPY api ./api
2122
WORKDIR /src/cmd
2223
RUN go build -o /bbr
2324

cmd/epp/runner/runner.go

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ var (
8080
enablePprof = flag.Bool("enable-pprof", runserver.DefaultEnablePprof, "Enables pprof handlers. Defaults to true. Set to false to disable pprof handlers.")
8181
poolName = flag.String("pool-name", runserver.DefaultPoolName, "Name of the InferencePool this Endpoint Picker is associated with.")
8282
poolGroup = flag.String("pool-group", runserver.DefaultPoolGroup, "group of the InferencePool this Endpoint Picker is associated with.")
83-
poolNamespace = flag.String("pool-namespace", runserver.DefaultPoolNamespace, "Namespace of the InferencePool this Endpoint Picker is associated with.")
83+
poolNamespace = flag.String("pool-namespace", "", "Namespace of the InferencePool this Endpoint Picker is associated with.")
8484
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
8585
secureServing = flag.Bool("secure-serving", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.")
8686
healthChecking = flag.Bool("health-checking", runserver.DefaultHealthChecking, "Enables health checking")
@@ -188,9 +188,20 @@ func (r *Runner) Run(ctx context.Context) error {
188188
FilterProvider: filters.WithAuthenticationAndAuthorization,
189189
}
190190

191+
// Determine pool namespace: if --pool-namespace is non-empty, use it; else NAMESPACE env var; else default
192+
resolvePoolNamespace := func() string {
193+
if *poolNamespace != "" {
194+
return *poolNamespace
195+
}
196+
if nsEnv := os.Getenv("NAMESPACE"); nsEnv != "" {
197+
return nsEnv
198+
}
199+
return runserver.DefaultPoolNamespace
200+
}
201+
resolvedPoolNamespace := resolvePoolNamespace()
191202
poolNamespacedName := types.NamespacedName{
192203
Name: *poolName,
193-
Namespace: *poolNamespace,
204+
Namespace: resolvedPoolNamespace,
194205
}
195206
poolGroupKind := schema.GroupKind{
196207
Group: *poolGroup,

config/charts/inferencepool/README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,30 @@ Then apply it with:
117117
helm install vllm-llama3-8b-instruct ./config/charts/inferencepool -f values.yaml
118118
```
119119

120+
### Install with Monitoring
121+
122+
To enable metrics collection and monitoring for the EndpointPicker, you can configure Prometheus ServiceMonitor creation:
123+
124+
```yaml
125+
inferenceExtension:
126+
monitoring:
127+
interval: "10s"
128+
prometheus:
129+
enabled: true
130+
secret:
131+
name: inference-gateway-sa-metrics-reader-secret
132+
```
133+
134+
**Note:** Prometheus monitoring requires the Prometheus Operator and ServiceMonitor CRD to be installed in the cluster.
135+
136+
For GKE environments, monitoring is automatically configured when `provider.name` is set to `gke`.
137+
138+
Then apply it with:
139+
140+
```txt
141+
helm install vllm-llama3-8b-instruct ./config/charts/inferencepool -f values.yaml
142+
```
143+
120144
## Uninstall
121145

122146
Run the following command to uninstall the chart:
@@ -147,6 +171,9 @@ The following table list the configurable parameters of the chart.
147171
| `inferenceExtension.affinity` | Affinity for the endpoint picker. Defaults to `{}`. |
148172
| `inferenceExtension.tolerations` | Tolerations for the endpoint picker. Defaults to `[]`. |
149173
| `inferenceExtension.flags.has-enable-leader-election` | Enable leader election for high availability. When enabled, only one EPP pod (the leader) will be ready to serve traffic. |
174+
| `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. |
175+
| `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. |
176+
| `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. |
150177
| `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. |
151178
| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: `gke`. Defaults to `none`. |
152179

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{{- if or .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.gke.enabled }}
2+
apiVersion: v1
3+
kind: Secret
4+
metadata:
5+
name: {{ .Values.inferenceExtension.monitoring.secret.name }}
6+
namespace: {{ .Release.Namespace }}
7+
labels:
8+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
9+
annotations:
10+
kubernetes.io/service-account.name: {{ include "gateway-api-inference-extension.name" . }}
11+
type: kubernetes.io/service-account-token
12+
{{- end }}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }}
2+
apiVersion: monitoring.coreos.com/v1
3+
kind: ServiceMonitor
4+
metadata:
5+
name: {{ include "gateway-api-inference-extension.name" . }}-monitor
6+
namespace: {{ .Release.Namespace }}
7+
labels:
8+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
9+
spec:
10+
endpoints:
11+
- interval: {{ .Values.inferenceExtension.monitoring.interval }}
12+
port: "http-metrics"
13+
path: "/metrics"
14+
authorization:
15+
credentials:
16+
key: token
17+
name: {{ .Values.inferenceExtension.monitoring.secret.name }}
18+
jobLabel: {{ include "gateway-api-inference-extension.name" . }}
19+
namespaceSelector:
20+
matchNames:
21+
- {{ .Release.Namespace }}
22+
selector:
23+
matchLabels:
24+
{{- include "gateway-api-inference-extension.labels" . | nindent 6 }}
25+
{{- end }}

config/charts/inferencepool/templates/gke.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,15 @@ spec:
4646
endpoints:
4747
- port: metrics
4848
scheme: http
49-
interval: 5s
49+
interval: {{ .Values.inferenceExtension.monitoring.interval }}
5050
path: /metrics
5151
authorization:
5252
type: Bearer
5353
credentials:
5454
secret:
55-
name: {{ .Values.gke.monitoringSecret.name }}
55+
name: {{ .Values.inferenceExtension.monitoring.secret.name }}
5656
key: token
57-
namespace: {{ .Values.gke.monitoringSecret.namespace }}
57+
namespace: {{ .Release.Namespace }}
5858
selector:
5959
matchLabels:
6060
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}

0 commit comments

Comments
 (0)