Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/core/v1alpha1/model_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ const (
// Once either of them qualified, we'll expose this as a field in Model.
ModelPreheatAnnoKey = "llmaz.io/model-preheat"

// ModelActivatorAnnotationKey is used to indicate whether the model is activated by the activator.
ModelActivatorAnnoKey = "activator.llmaz.io/playground"
// CachedModelActivatorAnnotationKey is used to cache the activator info of the model.
CachedModelActivatorAnnoKey = "cached.activator.llmaz.io"

HUGGING_FACE = "Huggingface"
MODEL_SCOPE = "ModelScope"

Expand Down
4 changes: 4 additions & 0 deletions chart/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ spec:
env:
- name: KUBERNETES_CLUSTER_DOMAIN
value: {{ quote .Values.kubernetesClusterDomain }}
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Values.controllerManager.manager.image.tag
| default .Chart.AppVersion }}
livenessProbe:
Expand Down
2 changes: 2 additions & 0 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ controllerManager:
- --metrics-bind-address=:8443
- --leader-elect
- --namespace=llmaz-system
- --enable-serverless
- --pod-ip=$(POD_IP)
containerSecurityContext:
allowPrivilegeEscalation: false
capabilities:
Expand Down
23 changes: 21 additions & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/dynamic"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
Expand Down Expand Up @@ -63,10 +64,14 @@ func main() {
var enableLeaderElection bool
var probeAddr string
var namespace string
var enableServerless bool
var podIP string

flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.StringVar(&namespace, "namespace", "llmaz-system", "The namespace of the llmaz to deploy")
flag.BoolVar(&enableServerless, "enable-serverless", false, "Enable the serverless feature")
flag.StringVar(&podIP, "pod-ip", "", "The pod IP of the llmaz controller manager")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
Expand Down Expand Up @@ -120,7 +125,7 @@ func main() {
// Cert won't be ready until manager starts, so start a goroutine here which
// will block until the cert is ready before setting up the controllers.
// Controllers who register after manager starts will start directly.
go setupControllers(mgr, certsReady)
go setupControllers(mgr, certsReady, enableServerless, podIP)

//+kubebuilder:scaffold:builder

Expand All @@ -140,7 +145,7 @@ func main() {
}
}

func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
func setupControllers(mgr ctrl.Manager, certsReady chan struct{}, enableServerless bool, podIP string) {
// The controllers won't work until the webhooks are operating,
// and the webhook won't work until the certs are all in places.
setupLog.Info("waiting for the cert generation to complete")
Expand Down Expand Up @@ -176,6 +181,20 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
os.Exit(1)
}

if enableServerless {
dynamicClient, err := dynamic.NewForConfig(mgr.GetConfig())
if err != nil {
setupLog.Error(err, "unable to create dynamic client")
os.Exit(1)
}

activatorReconciler := inferencecontroller.NewActivatorReconciler(mgr, dynamicClient, podIP)
if err := activatorReconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Activator")
os.Exit(1)
}
}

if os.Getenv("ENABLE_WEBHOOKS") != "false" {
if err := webhook.SetupOpenModelWebhook(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "Model")
Expand Down
10 changes: 10 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ rules:
verbs:
- get
- list
- apiGroups:
- ""
resources:
- endpoints
verbs:
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
Expand Down
Loading
Loading