Skip to content

Commit 3685e6a

Browse files
committed
update catalog
1 parent a1248f3 commit 3685e6a

File tree

5 files changed

+132
-5
lines changed

5 files changed

+132
-5
lines changed

catalog/bge-reranker-v2-m3.yaml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
apiVersion: v1
2+
kind: ModelCatalog
3+
metadata:
4+
name: bge-reranker-v2-m3
5+
display_name: BAAI/bge-reranker-v2-m3
6+
labels:
7+
icon_url: 'https://cdn-thumbnails.huggingface.co/social-thumbnails/BAAI.png'
8+
hf_repo_url: 'https://huggingface.co/BAAI/bge-reranker-v2-m3'
9+
spec:
10+
model:
11+
registry: ''
12+
name: BAAI/bge-reranker-v2-m3
13+
file: model.safetensors
14+
version: latest
15+
task: text-rerank
16+
engine:
17+
engine: vllm
18+
version: v1
19+
resources:
20+
cpu: 1
21+
memory: 1
22+
replicas:
23+
num: 1
24+
deployment_options:
25+
scheduler:
26+
type: pow2
27+
variables:
28+
RAY_SCHEDULER_TYPE: pow2
29+
engine_args:
30+
served_model_name: BAAI/bge-reranker-v2-m3

catalog/nomic-embed-text-v1-gguf.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@ spec:
1616
engine:
1717
engine: llama-cpp
1818
version: v1
19-
resources: {}
19+
resources:
20+
cpu: 1
21+
memory: 1
2022
replicas:
2123
num: 1
2224
deployment_options:
2325
scheduler:
24-
type: consistent_hash
25-
virtual_nodes: 150
26-
load_factor: 1.25
26+
type: pow2
2727
variables:
28-
RAY_SCHEDULER_TYPE: consistent_hash
28+
RAY_SCHEDULER_TYPE: pow2
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
apiVersion: v1
2+
kind: ModelCatalog
3+
metadata:
4+
name: qwen2-5-3b-instruct-gguf
5+
display_name: Qwen/Qwen2.5-3B-Instruct-GGUF
6+
labels:
7+
icon_url: 'https://cdn-thumbnails.huggingface.co/social-thumbnails/Qwen.png'
8+
hf_repo_url: 'https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GGUF'
9+
spec:
10+
model:
11+
registry: ''
12+
name: Qwen/Qwen2.5-3B-Instruct-GGUF
13+
file: '*8_0.gguf'
14+
version: latest
15+
task: text-generation
16+
engine:
17+
engine: llama-cpp
18+
version: v1
19+
resources:
20+
cpu: 2
21+
memory: 2
22+
replicas:
23+
num: 1
24+
deployment_options:
25+
scheduler:
26+
type: pow2
27+
variables:
28+
RAY_SCHEDULER_TYPE: pow2
29+
engine_args: {}

catalog/qwen3-4b-instruct.yaml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
apiVersion: v1
2+
kind: ModelCatalog
3+
metadata:
4+
name: qwen3-4b-instruct
5+
display_name: Qwen/Qwen3-4B-Instruct-2507
6+
labels:
7+
icon_url: 'https://cdn-thumbnails.huggingface.co/social-thumbnails/Qwen.png'
8+
hf_repo_url: 'https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507'
9+
spec:
10+
model:
11+
registry: ''
12+
name: Qwen/Qwen3-4B-Instruct-2507
13+
file: model-00001-of-00003.safetensors
14+
version: latest
15+
task: text-generation
16+
engine:
17+
engine: vllm
18+
version: v1
19+
resources:
20+
cpu: 2
21+
memory: 2
22+
replicas:
23+
num: 1
24+
deployment_options:
25+
scheduler:
26+
type: consistent_hash
27+
virtual_nodes: 150
28+
load_factor: 1.25
29+
variables:
30+
RAY_SCHEDULER_TYPE: consistent_hash
31+
engine_args:
32+
tensor_parallel_size: 1
33+
max_model_len: 4096
34+
enforce_eager: true
35+
gpu_memory_utilization: 0.95
36+
enable_chunked_prefill: true
37+
tool_call_parser: hermes
38+
served_model_name: Qwen/Qwen3-4B-Instruct-2507

catalog/qwen3-embedding-4b.yaml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
apiVersion: v1
2+
kind: ModelCatalog
3+
metadata:
4+
name: qwen3-embedding-4b
5+
display_name: Qwen/Qwen3-Embedding-4B
6+
labels:
7+
icon_url: 'https://cdn-thumbnails.huggingface.co/social-thumbnails/Qwen.png'
8+
hf_repo_url: 'https://huggingface.co/Qwen/Qwen3-Embedding-4B'
9+
spec:
10+
model:
11+
registry: ''
12+
name: Qwen/Qwen3-Embedding-4B
13+
file: model-00001-of-00002.safetensors
14+
version: latest
15+
task: text-embedding
16+
engine:
17+
engine: vllm
18+
version: v1
19+
resources:
20+
cpu: 1
21+
memory: 1
22+
replicas:
23+
num: 1
24+
deployment_options:
25+
scheduler:
26+
type: pow2
27+
variables:
28+
RAY_SCHEDULER_TYPE: pow2
29+
engine_args:
30+
served_model_name: Qwen/Qwen3-Embedding-4B

0 commit comments

Comments
 (0)