File tree Expand file tree Collapse file tree 7 files changed +64
-4
lines changed Expand file tree Collapse file tree 7 files changed +64
-4
lines changed Original file line number Diff line number Diff line change @@ -124,6 +124,7 @@ run: check-env-run
124
124
--env VLLM_API_TOKEN=$(ANSIBLE_CHATBOT_VLLM_API_TOKEN ) \
125
125
--env INFERENCE_MODEL=$(ANSIBLE_CHATBOT_INFERENCE_MODEL ) \
126
126
--env INFERENCE_MODEL_FILTER=$(ANSIBLE_CHATBOT_INFERENCE_MODEL_FILTER ) \
127
+ --env GEMINI_API_KEY=$(GEMINI_API_KEY ) \
127
128
ansible-chatbot-stack:$(ANSIBLE_CHATBOT_VERSION )
128
129
129
130
run-test :
@@ -162,6 +163,7 @@ run-local-db: check-env-run-local-db
162
163
--env VLLM_API_TOKEN=$(ANSIBLE_CHATBOT_VLLM_API_TOKEN ) \
163
164
--env INFERENCE_MODEL=$(ANSIBLE_CHATBOT_INFERENCE_MODEL ) \
164
165
--env INFERENCE_MODEL_FILTER=$(ANSIBLE_CHATBOT_INFERENCE_MODEL_FILTER ) \
166
+ --env GEMINI_API_KEY=$(GEMINI_API_KEY ) \
165
167
ansible-chatbot-stack:$(ANSIBLE_CHATBOT_VERSION )
166
168
167
169
clean :
Original file line number Diff line number Diff line change @@ -154,6 +154,20 @@ Runs basic tests against the local container.
154
154
kubectl apply -f my-chatbot-stack-deploy.yaml
155
155
```
156
156
157
+ ## Appendix - Google Gemini
158
+
159
+ * Please set the environment variable ` OPENAI_API_KEY=<YOUR_API_KEY> `
160
+ * Example of a ` v1/query ` request:
161
+ ``` json
162
+ {
163
+ "query" : " hello" ,
164
+ "system_prompt" : " You are a helpful assistant." ,
165
+ "model" : " gemini/gemini-2.5-flash" ,
166
+ "provider" : " gemini"
167
+ }
168
+ ```
169
+
170
+
157
171
## Appendix - Host clean-up
158
172
159
173
If you have the need for re-building images, apply the following clean-ups right before:
Original file line number Diff line number Diff line change @@ -18,6 +18,10 @@ providers:
18
18
max_tokens : ${env.VLLM_MAX_TOKENS:=4096}
19
19
api_token : ${env.VLLM_API_TOKEN:=fake}
20
20
tls_verify : ${env.VLLM_TLS_VERIFY:=true}
21
+ - provider_id : gemini
22
+ provider_type : remote::gemini
23
+ config :
24
+ api_key : ${env.GEMINI_API_KEY:=fake}
21
25
- provider_id : inline_sentence-transformer
22
26
provider_type : inline::sentence-transformers
23
27
config : {}
@@ -85,6 +89,11 @@ models:
85
89
model_id : ${env.EMBEDDINGS_MODEL:=/.llama/data/distributions/ansible-chatbot/embeddings_model}
86
90
provider_id : inline_sentence-transformer
87
91
model_type : embedding
92
+ - metadata : {}
93
+ model_id : ${env.GEMINI_INFERENCE_MODEL:=gemini/gemini-2.5-flash}
94
+ provider_id : gemini
95
+ provider_model_id : gemini/gemini-2.5-flash
96
+ model_type : llm
88
97
shields : []
89
98
vector_dbs :
90
99
- metadata : {}
Original file line number Diff line number Diff line change @@ -18,6 +18,10 @@ providers:
18
18
max_tokens : ${env.VLLM_MAX_TOKENS:=4096}
19
19
api_token : ${env.VLLM_API_TOKEN:=fake}
20
20
tls_verify : ${env.VLLM_TLS_VERIFY:=true}
21
+ - provider_id : gemini
22
+ provider_type : remote::gemini
23
+ config :
24
+ api_key : ${env.GEMINI_API_KEY:=fake}
21
25
- provider_id : inline_sentence-transformer
22
26
provider_type : inline::sentence-transformers
23
27
config : {}
@@ -85,6 +89,11 @@ models:
85
89
model_id : ${env.EMBEDDINGS_MODEL:=/.llama/data/distributions/ansible-chatbot/embeddings_model}
86
90
provider_id : inline_sentence-transformer
87
91
model_type : embedding
92
+ - metadata : {}
93
+ model_id : ${env.GEMINI_INFERENCE_MODEL:=gemini/gemini-2.5-flash}
94
+ provider_id : gemini
95
+ provider_model_id : gemini/gemini-2.5-flash
96
+ model_type : llm
88
97
shields : []
89
98
vector_dbs :
90
99
- metadata : {}
Original file line number Diff line number Diff line change @@ -15,6 +15,7 @@ dependencies = [
15
15
" opentelemetry-exporter-otlp~=1.34.1" ,
16
16
" sentence-transformers>=5.0.0" ,
17
17
" sqlalchemy~=2.0.41" ,
18
+ " litellm~=1.75.3" ,
18
19
]
19
20
20
21
[dependency-groups ]
Original file line number Diff line number Diff line change @@ -37,6 +37,7 @@ joblib==1.5.1
37
37
jsonschema == 4.24.0
38
38
jsonschema-specifications == 2025.4.1
39
39
lightspeed-stack-providers == 0.1.14
40
+ litellm == 1.75.5.post1
40
41
llama-api-client == 0.1.2
41
42
llama-stack == 0.2.16
42
43
llama-stack-client == 0.2.16
@@ -62,7 +63,7 @@ nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform ==
62
63
nvidia-nccl-cu12 == 2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
63
64
nvidia-nvjitlink-cu12 == 12.6.85 ; platform_machine == 'x86_64' and sys_platform == 'linux'
64
65
nvidia-nvtx-cu12 == 12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
65
- openai == 1.91.0
66
+ openai == 1.99.9
66
67
opentelemetry-api == 1.34.1
67
68
opentelemetry-exporter-otlp == 1.34.1
68
69
opentelemetry-exporter-otlp-proto-common == 1.34.1
You can’t perform that action at this time.
0 commit comments