@@ -140,53 +140,37 @@ def chat(message: str, history: List[List[str]], agent_type: str, use_cot: bool,
140
140
elif "8-bit" in agent_type :
141
141
quantization = "8bit"
142
142
model_type = "Local (Mistral)"
143
- elif "Ollama" in agent_type :
144
- model_type = "Ollama"
145
- # Extract model name from agent_type and use correct Ollama model names
146
- if "llama3" in agent_type .lower ():
147
- model_name = "ollama:llama3"
148
- elif "phi-3" in agent_type .lower ():
149
- model_name = "ollama:phi3"
150
- elif "qwen2" in agent_type .lower ():
151
- model_name = "ollama:qwen2"
143
+ elif agent_type == "openai" :
144
+ model_type = "OpenAI"
152
145
else :
153
- model_type = agent_type
146
+ # All other models are treated as Ollama models
147
+ model_type = "Ollama"
148
+ model_name = agent_type
154
149
155
150
# Select appropriate agent and reinitialize with correct settings
156
- if "Local" in model_type :
151
+ if model_type == "OpenAI" :
152
+ if not openai_key :
153
+ response_text = "OpenAI key not found. Please check your config."
154
+ print (f"Error: { response_text } " )
155
+ return history + [[message , response_text ]]
156
+ agent = RAGAgent (vector_store , openai_api_key = openai_key , use_cot = use_cot ,
157
+ collection = collection , skip_analysis = skip_analysis )
158
+ elif model_type == "Local (Mistral)" :
157
159
# For HF models, we need the token
158
160
if not hf_token :
159
161
response_text = "Local agent not available. Please check your HuggingFace token configuration."
160
162
print (f"Error: { response_text } " )
161
163
return history + [[message , response_text ]]
162
164
agent = LocalRAGAgent (vector_store , use_cot = use_cot , collection = collection ,
163
165
skip_analysis = skip_analysis , quantization = quantization )
164
- elif model_type == "Ollama" :
165
- # For Ollama models
166
- if model_name :
167
- try :
168
- agent = LocalRAGAgent (vector_store , model_name = model_name , use_cot = use_cot ,
169
- collection = collection , skip_analysis = skip_analysis )
170
- except Exception as e :
171
- response_text = f"Error initializing Ollama model: { str (e )} . Falling back to Local Mistral."
172
- print (f"Error: { response_text } " )
173
- # Fall back to Mistral if Ollama fails
174
- if hf_token :
175
- agent = LocalRAGAgent (vector_store , use_cot = use_cot , collection = collection ,
176
- skip_analysis = skip_analysis )
177
- else :
178
- return history + [[message , "Local Mistral agent not available for fallback. Please check your HuggingFace token configuration." ]]
179
- else :
180
- response_text = "Ollama model not specified correctly."
181
- print (f"Error: { response_text } " )
182
- return history + [[message , response_text ]]
183
- else :
184
- if not openai_key :
185
- response_text = "OpenAI agent not available. Please check your OpenAI API key configuration."
166
+ else : # Ollama models
167
+ try :
168
+ agent = LocalRAGAgent (vector_store , model_name = model_name , use_cot = use_cot ,
169
+ collection = collection , skip_analysis = skip_analysis )
170
+ except Exception as e :
171
+ response_text = f"Error initializing Ollama model: { str (e )} "
186
172
print (f"Error: { response_text } " )
187
173
return history + [[message , response_text ]]
188
- agent = RAGAgent (vector_store , openai_api_key = openai_key , use_cot = use_cot ,
189
- collection = collection , skip_analysis = skip_analysis )
190
174
191
175
# Process query and get response
192
176
print ("Processing query..." )
@@ -305,92 +289,62 @@ def create_interface():
305
289
306
290
# Create model choices list for reuse
307
291
model_choices = []
308
- # HF models first if token is available
309
- if hf_token :
310
- model_choices .extend ([
311
- "Local (Mistral)" ,
312
- "Local (Mistral) - 4-bit Quantized" ,
313
- "Local (Mistral) - 8-bit Quantized" ,
314
- ])
315
- # Then Ollama models (don't require HF token)
292
+ # Only Ollama models (no more local Mistral deployments)
316
293
model_choices .extend ([
317
- "Ollama - llama3" ,
318
- "Ollama - phi-3" ,
319
- "Ollama - qwen2"
294
+ "qwq" ,
295
+ "gemma3" ,
296
+ "llama3.3" ,
297
+ "phi4" ,
298
+ "mistral" ,
299
+ "llava" ,
300
+ "phi3" ,
301
+ "deepseek-r1"
320
302
])
321
303
if openai_key :
322
- model_choices .append ("OpenAI " )
304
+ model_choices .append ("openai " )
323
305
324
- # Set default model to Ollama - qwen2
325
- default_model = "Ollama - qwen2 "
306
+ # Set default model to qwq
307
+ default_model = "qwq "
326
308
327
309
# Model Management Tab (First Tab)
328
310
with gr .Tab ("Model Management" ):
329
311
gr .Markdown ("""
330
- ## Model Management
331
-
332
- Download models in advance to prepare them for use in the chat interface.
333
-
334
- ### Hugging Face Models
335
-
336
- For Hugging Face models (Mistral), you'll need a Hugging Face token in your config.yaml file.
337
-
338
- ### Ollama Models (Default)
339
-
340
- Ollama models are used by default. For Ollama models, this will pull the model using the Ollama client.
341
- Make sure Ollama is installed and running on your system.
342
- You can download Ollama from [ollama.com/download](https://ollama.com/download)
312
+ ## Model Selection
313
+ Choose your preferred model for the conversation.
343
314
""" )
344
315
345
316
with gr .Row ():
346
317
with gr .Column ():
347
318
model_dropdown = gr .Dropdown (
348
319
choices = model_choices ,
349
- value = default_model if default_model in model_choices else model_choices [ 0 ] if model_choices else None ,
350
- label = "Select Model to Download " ,
351
- interactive = True
320
+ value = default_model ,
321
+ label = "Select Model" ,
322
+ info = "Choose the model to use for the conversation"
352
323
)
353
324
download_button = gr .Button ("Download Selected Model" )
354
325
model_status = gr .Textbox (
355
326
label = "Download Status" ,
356
327
placeholder = "Select a model and click Download to begin..." ,
357
328
interactive = False
358
329
)
359
-
360
- with gr .Column ():
361
- gr .Markdown ("""
362
- ### Model Information
363
-
364
- **Ollama - qwen2** (DEFAULT): Alibaba's Qwen2 model via Ollama.
365
- - Size: ~4GB
366
- - Requires Ollama to be installed and running
367
- - High-quality model with good performance
368
-
369
- **Ollama - llama3**: Meta's Llama 3 model via Ollama.
370
- - Size: ~4GB
371
- - Requires Ollama to be installed and running
372
- - Excellent performance and quality
373
-
374
- **Ollama - phi-3**: Microsoft's Phi-3 model via Ollama.
375
- - Size: ~4GB
376
- - Requires Ollama to be installed and running
377
- - Efficient small model with good performance
378
-
379
- **Local (Mistral)**: The default Mistral-7B-Instruct-v0.2 model.
380
- - Size: ~14GB
381
- - VRAM Required: ~8GB
382
- - Good balance of quality and speed
383
-
384
- **Local (Mistral) - 4-bit Quantized**: 4-bit quantized version of Mistral-7B.
385
- - Size: ~4GB
386
- - VRAM Required: ~4GB
387
- - Faster inference with minimal quality loss
388
-
389
- **Local (Mistral) - 8-bit Quantized**: 8-bit quantized version of Mistral-7B.
390
- - Size: ~7GB
391
- - VRAM Required: ~6GB
392
- - Balance between quality and memory usage
393
- """ )
330
+
331
+ # Add model FAQ section
332
+ gr .Markdown ("""
333
+ ## Model FAQ
334
+
335
+ | Model | Parameters | Size | Download Command |
336
+ |-------|------------|------|------------------|
337
+ | qwq | 32B | 20GB | qwq:latest |
338
+ | gemma3 | 4B | 3.3GB | gemma3:latest |
339
+ | llama3.3 | 70B | 43GB | llama3.3:latest |
340
+ | phi4 | 14B | 9.1GB | phi4:latest |
341
+ | mistral | 7B | 4.1GB | mistral:latest |
342
+ | llava | 7B | 4.5GB | llava:latest |
343
+ | phi3 | 4B | 4.0GB | phi3:latest |
344
+ | deepseek-r1 | 7B | 4.7GB | deepseek-r1:latest |
345
+
346
+ Note: All models are available through Ollama. Make sure Ollama is running on your system.
347
+ """ )
394
348
395
349
# Document Processing Tab
396
350
with gr .Tab ("Document Processing" ):
@@ -580,13 +534,30 @@ def main():
580
534
try :
581
535
import ollama
582
536
try :
583
- # Check if Ollama is running and qwen2 is available
537
+ # Check if Ollama is running and list available models
584
538
models = ollama .list ().models
585
539
available_models = [model .model for model in models ]
586
- if "qwen2" not in available_models and "qwen2:latest" not in available_models :
587
- print ("⚠️ Warning: Ollama is running but qwen2 model is not available. Please run 'ollama pull qwen2' or download through the interface." )
588
- except Exception :
589
- print ("⚠️ Warning: Ollama is installed but not running or encountered an error. The default model may not work." )
540
+
541
+ # Check if any default models are available
542
+ if "qwen2" not in available_models and "qwen2:latest" not in available_models and \
543
+ "llama3" not in available_models and "llama3:latest" not in available_models and \
544
+ "phi3" not in available_models and "phi3:latest" not in available_models :
545
+ print ("⚠️ Warning: Ollama is running but no default models (qwen2, llama3, phi3) are available." )
546
+ print ("Please download a model through the Model Management tab or run:" )
547
+ print (" ollama pull qwen2" )
548
+ print (" ollama pull llama3" )
549
+ print (" ollama pull phi3" )
550
+ else :
551
+ available_default_models = []
552
+ for model in ["qwen2" , "llama3" , "phi3" ]:
553
+ if model in available_models or f"{ model } :latest" in available_models :
554
+ available_default_models .append (model )
555
+
556
+ print (f"✅ Ollama is running with available default models: { ', ' .join (available_default_models )} " )
557
+ print (f"All available models: { ', ' .join (available_models )} " )
558
+ except Exception as e :
559
+ print (f"⚠️ Warning: Ollama is installed but not running or encountered an error: { str (e )} " )
560
+ print ("Please start Ollama before using the interface." )
590
561
except ImportError :
591
562
print ("⚠️ Warning: Ollama package not installed. Please install with: pip install ollama" )
592
563
@@ -674,17 +645,11 @@ def download_model(model_type: str) -> str:
674
645
675
646
except Exception as e :
676
647
return f"❌ Error downloading model: { str (e )} "
677
-
678
- elif "Ollama" in model_type :
648
+ # all ollama models
649
+ else :
679
650
# Extract model name from model_type
680
- if "llama3" in model_type .lower ():
681
- model_name = "llama3"
682
- elif "phi-3" in model_type .lower ():
683
- model_name = "phi3"
684
- elif "qwen2" in model_type .lower ():
685
- model_name = "qwen2"
686
- else :
687
- return "❌ Error: Unknown Ollama model type"
651
+ # Remove the 'Ollama - ' prefix and any leading/trailing whitespace
652
+ model_name = model_type .replace ("Ollama - " , "" ).strip ()
688
653
689
654
# Use Ollama to pull the model
690
655
try :
@@ -732,8 +697,6 @@ def download_model(model_type: str) -> str:
732
697
return "❌ Error: Could not connect to Ollama. Please make sure Ollama is installed and running."
733
698
except Exception as e :
734
699
return f"❌ Error pulling Ollama model: { str (e )} "
735
- else :
736
- return "❌ Error: Unknown model type"
737
700
738
701
except Exception as e :
739
702
return f"❌ Error: { str (e )} "
0 commit comments