Skip to content

Commit 6701c45

Browse files
committed
add configmap to run periodic opt
1 parent 3e57576 commit 6701c45

File tree

3 files changed

+106
-13
lines changed

3 files changed

+106
-13
lines changed

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@ Make sure you have the proper permission to the registry if the above commands d
5656
make install
5757
```
5858

59+
**Install the configmap to run optimizer loop:**
60+
61+
```sh
62+
kubectl apply -f deploy/ticker-configmap.yaml
63+
```
64+
5965
**Deploy the Manager to the cluster with the image specified by `IMG`:**
6066

6167
```sh
@@ -76,7 +82,7 @@ kubectl apply -k samples/input_sample.yaml
7682
**Delete the instances (CRs) from the cluster:**
7783

7884
```sh
79-
kubectl delete -k config/samples/
85+
kubectl delete -f amples/input_sample.yaml
8086
```
8187

8288
**Delete the APIs(CRDs) from the cluster:**

config/rbac/role.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@ kind: ClusterRole
44
metadata:
55
name: manager-role
66
rules:
7+
- apiGroups:
8+
- ""
9+
resources:
10+
- configmaps
11+
verbs:
12+
- get
13+
- update
714
- apiGroups:
815
- ""
916
resources:

internal/controller/optimizer_controller.go

Lines changed: 92 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package controller
1818

1919
import (
2020
"context"
21+
"sync"
2122
"time"
2223

2324
"k8s.io/apimachinery/pkg/runtime"
@@ -37,6 +38,10 @@ import (
3738
type OptimizerReconciler struct {
3839
client.Client
3940
Scheme *runtime.Scheme
41+
42+
mu sync.Mutex
43+
ticker *time.Ticker
44+
stopTicker chan struct{}
4045
}
4146

4247
type AcceleratorModelInfo struct {
@@ -50,6 +55,12 @@ type AcceleratorModelInfo struct {
5055
// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;update;patch
5156
// +kubebuilder:rbac:groups="",resources=nodes/status,verbs=get;list;update;patch;watch
5257
// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list
58+
// +kubebuilder:rbac:groups="",resources=configmaps,verbs=get;update
59+
60+
const (
61+
configMapName = "inferno-optimizer-config"
62+
configMapNamespace = "default"
63+
)
5364

5465
func (r *OptimizerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
5566
logger := logf.FromContext(ctx)
@@ -144,21 +155,90 @@ func (r *OptimizerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
144155

145156
// SetupWithManager sets up the controller with the Manager.
146157
func (r *OptimizerReconciler) SetupWithManager(mgr ctrl.Manager) error {
147-
go func() {
148-
ticker := time.NewTicker(5 * time.Minute)
149-
defer ticker.Stop()
150-
151-
for {
152-
<-ticker.C
153-
ctx := context.Background()
158+
// Start watching ConfigMap and ticker logic
159+
go r.watchAndRunLoop()
154160

155-
if _, err := r.Reconcile(ctx, ctrl.Request{}); err != nil {
156-
log.Log.Error(err, "Periodic reconcile failed")
157-
}
158-
}
159-
}()
160161
return ctrl.NewControllerManagedBy(mgr).
161162
For(&llmdOptv1alpha1.Optimizer{}).
162163
Named("optimizer").
163164
Complete(r)
164165
}
166+
167+
func (r *OptimizerReconciler) watchAndRunLoop() {
168+
var lastInterval string
169+
170+
for {
171+
cm := &corev1.ConfigMap{}
172+
err := r.Get(context.Background(), types.NamespacedName{
173+
Name: configMapName,
174+
Namespace: configMapNamespace,
175+
}, cm)
176+
if err != nil {
177+
log.Log.Error(err, "Unable to read optimization config")
178+
time.Sleep(30 * time.Second)
179+
continue
180+
}
181+
182+
interval := cm.Data["GLOBAL_OPT_INTERVAL"]
183+
trigger := cm.Data["GLOBAL_OPT_TRIGGER"]
184+
185+
// Handle manual trigger
186+
if trigger == "true" {
187+
log.Log.Info("Manual optimization trigger received")
188+
_, err := r.Reconcile(context.Background(), ctrl.Request{})
189+
if err != nil {
190+
log.Log.Error(err, "Manual reconcile failed")
191+
}
192+
193+
// Reset trigger in ConfigMap
194+
cm.Data["GLOBAL_OPT_TRIGGER"] = "false"
195+
if err := r.Update(context.Background(), cm); err != nil {
196+
log.Log.Error(err, "Failed to reset GLOBAL_OPT_TRIGGER")
197+
}
198+
}
199+
200+
r.mu.Lock()
201+
if interval != lastInterval {
202+
// Stop previous ticker if any
203+
if r.stopTicker != nil {
204+
close(r.stopTicker)
205+
}
206+
207+
if interval != "" {
208+
d, err := time.ParseDuration(interval)
209+
if err != nil {
210+
log.Log.Error(err, "Invalid GLOBAL_OPT_INTERVAL")
211+
r.mu.Unlock()
212+
continue
213+
}
214+
215+
r.stopTicker = make(chan struct{})
216+
ticker := time.NewTicker(d)
217+
r.ticker = ticker
218+
219+
go func(stopCh <-chan struct{}, tick <-chan time.Time) {
220+
for {
221+
select {
222+
case <-tick:
223+
_, err := r.Reconcile(context.Background(), ctrl.Request{})
224+
if err != nil {
225+
log.Log.Error(err, "Manual reconcile failed")
226+
}
227+
case <-stopCh:
228+
return
229+
}
230+
}
231+
}(r.stopTicker, ticker.C)
232+
233+
log.Log.Info("Started periodic optimization ticker", "interval", interval)
234+
} else {
235+
r.ticker = nil
236+
log.Log.Info("GLOBAL_OPT_INTERVAL unset, disabling periodic optimization")
237+
}
238+
lastInterval = interval
239+
}
240+
r.mu.Unlock()
241+
242+
time.Sleep(10 * time.Second)
243+
}
244+
}

0 commit comments

Comments
 (0)