ai-system-config/Makefile at main · m4xx3d0ut/ai-system-config · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# ---------- Config ----------
NS        ?= ai
GATEWAY   ?= http://gateway.example.internal
ADMIN_TOKEN ?= changeme
# switch to kubectl if you don't have 'kc' alias
KUBECTL   ?= kubectl

# ---- simple short aliases ----
# canonical strings (already defined)
GLM1P5B       := local:zai-org/glm-edge-1.5b-chat
GLM4B         := local:zai-org/glm-edge-4b-chat
QWEN7B        := local:Qwen/Qwen2.5-Coder-7B-Instruct
QWEN7B_TGI    := local:Qwen/Qwen2.5-Coder-7B-Instruct@tgi
QWEN14B       := local:Qwen/Qwen2.5-Coder-14B-Instruct
QWEN14B_TGI   := local:Qwen/Qwen2.5-Coder-14B-Instruct@tgi
SC2_15B       := local:bigcode/starcoder2-15b-instruct
SC2_15B_TGI   := local:bigcode/starcoder2-15b-instruct@tgi
DSC2_16B      := local:deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct
DSC2_16B_TGI  := local:deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct@tgi

# pick a short alias and we’ll map it to MODEL
# accepted values: glm1p5b, glm4b, q7b, q7b@tgi, q14b, q14b@tgi, sc2, sc2@tgi, dsc2, dsc2@tgi
MODEL_SHORT ?= glm1p5b

# map MODEL_SHORT -> MODEL
ifeq ($(MODEL_SHORT),glm1p5b)
  MODEL := $(GLM1P5B)
endif
ifeq ($(MODEL_SHORT),glm4b)
  MODEL := $(GLM4B)
endif
ifeq ($(MODEL_SHORT),q7b)
  MODEL := $(QWEN7B)
endif
ifeq ($(MODEL_SHORT),q7b@tgi)
  MODEL := $(QWEN7B_TGI)
endif
ifeq ($(MODEL_SHORT),q14b)
  MODEL := $(QWEN14B)
endif
ifeq ($(MODEL_SHORT),q14b@tgi)
  MODEL := $(QWEN14B_TGI)
endif
ifeq ($(MODEL_SHORT),sc2)
  MODEL := $(SC2_15B)
endif
ifeq ($(MODEL_SHORT),sc2@tgi)
  MODEL := $(SC2_15B_TGI)
endif
ifeq ($(MODEL_SHORT),dsc2)
  MODEL := $(DSC2_16B)
endif
ifeq ($(MODEL_SHORT),dsc2@tgi)
  MODEL := $(DSC2_16B_TGI)
endif

# convenience wrappers: hotload-%, unload-%, chat-% using the short alias
.PHONY: hotload-% unload-% chat-%
hotload-%:
	@$(MAKE) --no-print-directory hotload MODEL_SHORT=$*

unload-%:
	@$(MAKE) --no-print-directory unload MODEL_SHORT=$*

# PROMPT still honored; usage: make chat-q7b PROMPT='…'
chat-%:
	@$(MAKE) --no-print-directory chat MODEL_SHORT=$* PROMPT="$(PROMPT)"

health:
	@curl -s $(GATEWAY)/healthz | jq .

models:
	@curl -s $(GATEWAY)/v1/models | jq .

hotload:
	@test -n "$(MODEL)" || (echo "MODEL is required"; exit 1)
	@curl -sS --fail -X POST $(GATEWAY)/admin/hotload \
	  -H "X-Admin-Token: $(ADMIN_TOKEN)" -H 'Content-Type: application/json' \
	  -d '{"model":"$(MODEL)"}' | jq .

unload:
	@test -n "$(MODEL)" || (echo "MODEL is required"; exit 1)
	@curl -sS --fail -X POST $(GATEWAY)/admin/unload \
	  -H "X-Admin-Token: $(ADMIN_TOKEN)" -H 'Content-Type: application/json' \
	  -d '{"model":"$(MODEL)"}' | jq .

purge:
	@test -n "$(MODEL)" || (echo "MODEL is required"; exit 1)
	@curl -sS --fail -X POST $(GATEWAY)/admin/purge-cache \
	  -H "X-Admin-Token: $(ADMIN_TOKEN)" -H 'Content-Type: application/json' \
	  -d '{"model":"$(MODEL)"}' | jq .

# Chat with a one-line prompt. Ex: make chat MODEL='$(QWEN14B_TGI)' PROMPT='Implement quicksort in Rust'
PROMPT ?= Say hello from $(MODEL).
chat:
	@test -n "$(MODEL)" || (echo "MODEL is required"; exit 1)
	@jq -n --arg m "$(MODEL)" --arg p "$(PROMPT)" \
	  '{model:$$m, messages:[{role:"user", content:$$p}], temperature:0.2, max_tokens:256}' \
	| curl -sS --fail $(GATEWAY)/v1/chat/completions -H 'Content-Type: application/json' -d @- | jq .

# ---------- K8s quick look ----------
.PHONY: ns-info ns-events pods gw-logs vllm-logs tgi-logs svc-eps

ns-info:
	@$(KUBECTL) get ns $(NS) || true
	@echo
	@$(KUBECTL) -n $(NS) get deploy,rs,po,svc | sed 's/^/  /'

ns-events:
	@$(KUBECTL) -n $(NS) get events --sort-by=.lastTimestamp | tail -n 100

pods:
	@$(KUBECTL) -n $(NS) get pods -o wide

svc-eps:
	@$(KUBECTL) -n $(NS) get svc,endpoints

# Gateway logs (all pods)
gw-logs:
	@$(KUBECTL) -n $(NS) logs -l app.kubernetes.io/name=gateway --all-containers=true --tail=200

# vLLM logs (all vLLM pods)
vllm-logs:
	@$(KUBECTL) -n $(NS) logs -l ai.zlabs.dev/component=vllm --all-containers=true --tail=200

# TGI logs (all TGI pods)
tgi-logs:
	@$(KUBECTL) -n $(NS) logs -l ai.zlabs.dev/component=tgi --all-containers=true --tail=200

# Follow logs (Ctrl+C to stop)
follow-gw:
	@$(KUBECTL) -n $(NS) logs -l app.kubernetes.io/name=gateway --all-containers=true -f

follow-vllm:
	@$(KUBECTL) -n $(NS) logs -l ai.zlabs.dev/component=vllm --all-containers=true -f

follow-tgi:
	@$(KUBECTL) -n $(NS) logs -l ai.zlabs.dev/component=tgi --all-containers=true -f