|
14 | 14 | "recipe_flex_shape_memory_size_in_gbs": 16, |
15 | 15 | "recipe_node_boot_volume_size_in_gbs": 200, |
16 | 16 | "recipe_ephemeral_storage_size": 100, |
17 | | - "recipe_image_uri": "docker.io/library/postgres:latest", |
| 17 | + "recipe_image_uri": "docker.io/library/postgres:13", |
18 | 18 | "recipe_container_port": "5432", |
19 | 19 | "recipe_host_port": "5432", |
20 | 20 | "recipe_container_env": [ |
|
47 | 47 | "recipe_flex_shape_memory_size_in_gbs": 16, |
48 | 48 | "recipe_node_boot_volume_size_in_gbs": 200, |
49 | 49 | "recipe_ephemeral_storage_size": 100, |
50 | | - "recipe_image_uri": "docker.io/chromadb/chroma:latest", |
| 50 | + "recipe_image_uri": "docker.io/chromadb/chroma:1.0.20", |
51 | 51 | "recipe_container_port": "8000", |
52 | 52 | "recipe_host_port": "8000", |
53 | 53 | "recipe_container_env": [ |
|
77 | 77 | "recipe_id": "llm_inference_nvidia", |
78 | 78 | "deployment_name": "vllm", |
79 | 79 | "recipe_mode": "service", |
80 | | - "recipe_image_uri": "iad.ocir.io/iduyx1qnmway/corrino-devops-repository:vllmv0.6.6.pos1", |
| 80 | + "recipe_image_uri": "docker.io/vllm/vllm-openai:v0.9.1", |
81 | 81 | "recipe_node_shape": "VM.GPU.A10.2", |
82 | 82 | "input_object_storage": [ |
83 | 83 | { |
|
87 | 87 | "include": ["NousResearch/Meta-Llama-3.1-8B-Instruct"] |
88 | 88 | } |
89 | 89 | ], |
90 | | - "recipe_container_env": [ |
91 | | - { |
92 | | - "key": "tensor_parallel_size", |
93 | | - "value": "2" |
94 | | - }, |
95 | | - { |
96 | | - "key": "model_name", |
97 | | - "value": "NousResearch/Meta-Llama-3.1-8B-Instruct" |
98 | | - }, |
99 | | - { |
100 | | - "key": "Model_Path", |
101 | | - "value": "/models/NousResearch/Meta-Llama-3.1-8B-Instruct" |
102 | | - } |
103 | | - ], |
104 | 90 | "recipe_replica_count": 1, |
105 | 91 | "recipe_container_port": "8000", |
106 | 92 | "recipe_nvidia_gpu_count": 2, |
107 | 93 | "recipe_node_pool_size": 1, |
108 | 94 | "recipe_node_boot_volume_size_in_gbs": 200, |
109 | 95 | "recipe_container_command_args": [ |
110 | 96 | "--model", |
111 | | - "$(Model_Path)", |
| 97 | + "/models/NousResearch/Meta-Llama-3.1-8B-Instruct", |
112 | 98 | "--tensor-parallel-size", |
113 | | - "$(tensor_parallel_size)" |
| 99 | + "2", |
| 100 | + "--served-model-name", |
| 101 | + "Meta-Llama-3.1-8B-Instruct" |
114 | 102 | ], |
115 | 103 | "recipe_ephemeral_storage_size": 100, |
116 | | - "recipe_shared_memory_volume_size_limit_in_mb": 200 |
| 104 | + "recipe_shared_memory_volume_size_limit_in_mb": 200, |
| 105 | + "recipe_readiness_probe_params": { |
| 106 | + "endpoint_path": "/health", |
| 107 | + "port": 8000, |
| 108 | + "scheme": "HTTP", |
| 109 | + "initial_delay_seconds": 20, |
| 110 | + "period_seconds": 30, |
| 111 | + "success_threshold": 1, |
| 112 | + "timeout_seconds": 10 |
| 113 | + } |
117 | 114 | }, |
118 | 115 | "exports": ["internal_dns_name"] |
119 | 116 | }, |
|
129 | 126 | "recipe_flex_shape_memory_size_in_gbs": 16, |
130 | 127 | "recipe_node_boot_volume_size_in_gbs": 200, |
131 | 128 | "recipe_ephemeral_storage_size": 100, |
132 | | - "recipe_image_uri": "docker.io/jaegertracing/jaeger:latest", |
| 129 | + "recipe_image_uri": "docker.io/jaegertracing/jaeger:2.9.0", |
133 | 130 | "recipe_container_port": "16686", |
134 | 131 | "recipe_additional_ingress_ports": [ |
135 | 132 | { |
|
154 | 151 | "recipe_flex_shape_memory_size_in_gbs": 16, |
155 | 152 | "recipe_node_boot_volume_size_in_gbs": 200, |
156 | 153 | "recipe_ephemeral_storage_size": 100, |
157 | | - "recipe_image_uri": "docker.io/llamastack/distribution-postgres-demo:latest", |
| 154 | + "recipe_image_uri": "docker.io/llamastack/distribution-postgres-demo:0.2.18", |
158 | 155 | "recipe_container_port": "8321", |
159 | 156 | "recipe_container_env": [ |
160 | 157 | { |
161 | 158 | "key": "INFERENCE_MODEL", |
162 | | - "value": "/models/NousResearch/Meta-Llama-3.1-8B-Instruct" |
| 159 | + "value": "Meta-Llama-3.1-8B-Instruct" |
163 | 160 | }, |
164 | 161 | { |
165 | 162 | "key": "VLLM_URL", |
|
173 | 170 | "key": "CHROMADB_URL", |
174 | 171 | "value": "http://${chroma.internal_dns_name}:8000" |
175 | 172 | }, |
| 173 | + { |
| 174 | + "key": "ENABLE_POSTGRES", |
| 175 | + "value": "1" |
| 176 | + }, |
176 | 177 | { |
177 | 178 | "key": "POSTGRES_HOST", |
178 | 179 | "value": "${postgres.internal_dns_name}" |
|
198 | 199 | "value": "console,otel_trace" |
199 | 200 | }, |
200 | 201 | { |
201 | | - "key": "OTEL_TRACE_ENDPOINT", |
202 | | - "value": "http://${jaeger.internal_dns_name}/jaeger/v1/traces" |
| 202 | + "key": "OTEL_EXPORTER_OTLP_ENDPOINT", |
| 203 | + "value": "http://${jaeger.internal_dns_name}/jaeger/" |
203 | 204 | } |
204 | 205 | ], |
205 | 206 | "output_object_storage": [ |
|
0 commit comments