@@ -142,13 +142,7 @@ def chat(message: str, history: List[List[str]], agent_type: str, use_cot: bool,
142
142
model_type = "Local (Mistral)"
143
143
elif "Ollama" in agent_type :
144
144
model_type = "Ollama"
145
- # Extract model name from agent_type and use correct Ollama model names
146
- if "llama3" in agent_type .lower ():
147
- model_name = "ollama:llama3"
148
- elif "phi-3" in agent_type .lower ():
149
- model_name = "ollama:phi3"
150
- elif "qwen2" in agent_type .lower ():
151
- model_name = "ollama:qwen2"
145
+ # Model name will be extracted later
152
146
else :
153
147
model_type = agent_type
154
148
@@ -161,28 +155,26 @@ def chat(message: str, history: List[List[str]], agent_type: str, use_cot: bool,
161
155
return history + [[message , response_text ]]
162
156
agent = LocalRAGAgent (vector_store , use_cot = use_cot , collection = collection ,
163
157
skip_analysis = skip_analysis , quantization = quantization )
164
- elif model_type == "Ollama" :
158
+ elif "Ollama" in model_type :
165
159
# For Ollama models
166
- if model_name :
167
- try :
168
- agent = LocalRAGAgent (vector_store , model_name = model_name , use_cot = use_cot ,
169
- collection = collection , skip_analysis = skip_analysis )
170
- except Exception as e :
171
- response_text = f"Error initializing Ollama model: { str (e )} . Falling back to Local Mistral."
172
- print (f"Error: { response_text } " )
173
- # Fall back to Mistral if Ollama fails
174
- if hf_token :
175
- agent = LocalRAGAgent (vector_store , use_cot = use_cot , collection = collection ,
176
- skip_analysis = skip_analysis )
177
- else :
178
- return history + [[message , "Local Mistral agent not available for fallback. Please check your HuggingFace token configuration." ]]
179
- else :
180
- response_text = "Ollama model not specified correctly."
160
+ # Extract model name directly from the model_type
161
+ model_name = model_type .replace ("Ollama - " , "" ).strip ()
162
+
163
+ try :
164
+ agent = LocalRAGAgent (vector_store , model_name = model_name , use_cot = use_cot ,
165
+ collection = collection , skip_analysis = skip_analysis )
166
+ except Exception as e :
167
+ response_text = f"Error initializing Ollama model: { str (e )} . Falling back to Local Mistral."
181
168
print (f"Error: { response_text } " )
182
- return history + [[message , response_text ]]
169
+ # Fall back to Mistral if Ollama fails
170
+ if hf_token :
171
+ agent = LocalRAGAgent (vector_store , use_cot = use_cot , collection = collection ,
172
+ skip_analysis = skip_analysis )
173
+ else :
174
+ return history + [[message , "Local Mistral agent not available for fallback. Please check your HuggingFace token configuration." ]]
183
175
else :
184
176
if not openai_key :
185
- response_text = "OpenAI agent not available . Please check your OpenAI API key configuration ."
177
+ response_text = "OpenAI key not found . Please check your config ."
186
178
print (f"Error: { response_text } " )
187
179
return history + [[message , response_text ]]
188
180
agent = RAGAgent (vector_store , openai_api_key = openai_key , use_cot = use_cot ,
@@ -316,7 +308,32 @@ def create_interface():
316
308
model_choices .extend ([
317
309
"Ollama - llama3" ,
318
310
"Ollama - phi-3" ,
319
- "Ollama - qwen2"
311
+ "Ollama - qwen2" ,
312
+ # New Ollama models
313
+ "Ollama - gemma3:1b" ,
314
+ "Ollama - gemma3" ,
315
+ "Ollama - gemma3:12b" ,
316
+ "Ollama - gemma3:27b" ,
317
+ "Ollama - qwq" ,
318
+ "Ollama - deepseek-r1" ,
319
+ "Ollama - deepseek-r1:671b" ,
320
+ "Ollama - llama3.3" ,
321
+ "Ollama - llama3.2" ,
322
+ "Ollama - llama3.2:1b" ,
323
+ "Ollama - llama3.2-vision" ,
324
+ "Ollama - llama3.2-vision:90b" ,
325
+ "Ollama - llama3.1" ,
326
+ "Ollama - llama3.1:405b" ,
327
+ "Ollama - phi4" ,
328
+ "Ollama - phi4-mini" ,
329
+ "Ollama - mistral" ,
330
+ "Ollama - moondream" ,
331
+ "Ollama - neural-chat" ,
332
+ "Ollama - starling-lm" ,
333
+ "Ollama - codellama" ,
334
+ "Ollama - llama2-uncensored" ,
335
+ "Ollama - llava" ,
336
+ "Ollama - granite3.2"
320
337
])
321
338
if openai_key :
322
339
model_choices .append ("OpenAI" )
@@ -390,8 +407,88 @@ def create_interface():
390
407
- Size: ~7GB
391
408
- VRAM Required: ~6GB
392
409
- Balance between quality and memory usage
410
+
411
+ For a complete list of supported models and specifications, see the **Model FAQ** tab.
393
412
""" )
394
413
414
+ # Model FAQ Tab
415
+ with gr .Tab ("Model FAQ" ):
416
+ gr .Markdown ("""
417
+ ## Model Information & Technical Requirements
418
+
419
+ This page provides detailed information about all supported models, including size, parameter count, and hardware requirements.
420
+
421
+ ### Memory Requirements
422
+
423
+ As a general guideline:
424
+ - You should have at least 8 GB of RAM available to run 7B parameter models
425
+ - You should have at least 16 GB of RAM available to run 13B parameter models
426
+ - You should have at least 32 GB of RAM available to run 33B+ parameter models
427
+ - For vision models, additional memory is required for image processing
428
+
429
+ ### Ollama Models
430
+
431
+ | Model | Parameters | Size | Download Command |
432
+ |-------|------------|------|-----------------|
433
+ | Gemma 3 | 1B | 815MB | ollama run gemma3:1b |
434
+ | Gemma 3 | 4B | 3.3GB | ollama run gemma3 |
435
+ | Gemma 3 | 12B | 8.1GB | ollama run gemma3:12b |
436
+ | Gemma 3 | 27B | 17GB | ollama run gemma3:27b |
437
+ | QwQ | 32B | 20GB | ollama run qwq |
438
+ | DeepSeek-R1 | 7B | 4.7GB | ollama run deepseek-r1 |
439
+ | DeepSeek-R1 | 671B | 404GB | ollama run deepseek-r1:671b |
440
+ | Llama 3.3 | 70B | 43GB | ollama run llama3.3 |
441
+ | Llama 3.2 | 3B | 2.0GB | ollama run llama3.2 |
442
+ | Llama 3.2 | 1B | 1.3GB | ollama run llama3.2:1b |
443
+ | Llama 3.2 Vision | 11B | 7.9GB | ollama run llama3.2-vision |
444
+ | Llama 3.2 Vision | 90B | 55GB | ollama run llama3.2-vision:90b |
445
+ | Llama 3.1 | 8B | 4.7GB | ollama run llama3.1 |
446
+ | Llama 3.1 | 405B | 231GB | ollama run llama3.1:405b |
447
+ | Phi 4 | 14B | 9.1GB | ollama run phi4 |
448
+ | Phi 4 Mini | 3.8B | 2.5GB | ollama run phi4-mini |
449
+ | Mistral | 7B | 4.1GB | ollama run mistral |
450
+ | Moondream 2 | 1.4B | 829MB | ollama run moondream |
451
+ | Neural Chat | 7B | 4.1GB | ollama run neural-chat |
452
+ | Starling | 7B | 4.1GB | ollama run starling-lm |
453
+ | Code Llama | 7B | 3.8GB | ollama run codellama |
454
+ | Llama 2 Uncensored | 7B | 3.8GB | ollama run llama2-uncensored |
455
+ | LLaVA | 7B | 4.5GB | ollama run llava |
456
+ | Granite-3.2 | 8B | 4.9GB | ollama run granite3.2 |
457
+ | Llama 3 | 8B | 4.7GB | ollama run llama3 |
458
+ | Phi 3 | 4B | 4.0GB | ollama run phi3 |
459
+ | Qwen 2 | 7B | 4.1GB | ollama run qwen2 |
460
+
461
+ ### HuggingFace Models
462
+
463
+ | Model | Parameters | Size | Quantization | VRAM Required |
464
+ |-------|------------|------|--------------|---------------|
465
+ | Mistral | 7B | 14GB | None | 8GB |
466
+ | Mistral | 7B | 4GB | 4-bit | 4GB |
467
+ | Mistral | 7B | 7GB | 8-bit | 6GB |
468
+
469
+ ### Recommended Models
470
+
471
+ **Best Overall Performance**:
472
+ - Ollama - llama3
473
+ - Ollama - llama3.2-vision (for image processing)
474
+ - Ollama - phi4
475
+
476
+ **Best for Limited Hardware (8GB RAM)**:
477
+ - Ollama - llama3.2:1b
478
+ - Ollama - gemma3:1b
479
+ - Ollama - phi4-mini
480
+ - Ollama - moondream
481
+
482
+ **Best for Code Tasks**:
483
+ - Ollama - codellama
484
+ - Ollama - deepseek-r1
485
+
486
+ **Best for Enterprise Use**:
487
+ - Ollama - qwen2
488
+ - Ollama - granite3.2
489
+ - Ollama - neural-chat
490
+ """ )
491
+
395
492
# Document Processing Tab
396
493
with gr .Tab ("Document Processing" ):
397
494
with gr .Row ():
@@ -580,13 +677,30 @@ def main():
580
677
try :
581
678
import ollama
582
679
try :
583
- # Check if Ollama is running and qwen2 is available
680
+ # Check if Ollama is running and list available models
584
681
models = ollama .list ().models
585
682
available_models = [model .model for model in models ]
586
- if "qwen2" not in available_models and "qwen2:latest" not in available_models :
587
- print ("⚠️ Warning: Ollama is running but qwen2 model is not available. Please run 'ollama pull qwen2' or download through the interface." )
588
- except Exception :
589
- print ("⚠️ Warning: Ollama is installed but not running or encountered an error. The default model may not work." )
683
+
684
+ # Check if any default models are available
685
+ if "qwen2" not in available_models and "qwen2:latest" not in available_models and \
686
+ "llama3" not in available_models and "llama3:latest" not in available_models and \
687
+ "phi3" not in available_models and "phi3:latest" not in available_models :
688
+ print ("⚠️ Warning: Ollama is running but no default models (qwen2, llama3, phi3) are available." )
689
+ print ("Please download a model through the Model Management tab or run:" )
690
+ print (" ollama pull qwen2" )
691
+ print (" ollama pull llama3" )
692
+ print (" ollama pull phi3" )
693
+ else :
694
+ available_default_models = []
695
+ for model in ["qwen2" , "llama3" , "phi3" ]:
696
+ if model in available_models or f"{ model } :latest" in available_models :
697
+ available_default_models .append (model )
698
+
699
+ print (f"✅ Ollama is running with available default models: { ', ' .join (available_default_models )} " )
700
+ print (f"All available models: { ', ' .join (available_models )} " )
701
+ except Exception as e :
702
+ print (f"⚠️ Warning: Ollama is installed but not running or encountered an error: { str (e )} " )
703
+ print ("Please start Ollama before using the interface." )
590
704
except ImportError :
591
705
print ("⚠️ Warning: Ollama package not installed. Please install with: pip install ollama" )
592
706
@@ -677,14 +791,8 @@ def download_model(model_type: str) -> str:
677
791
678
792
elif "Ollama" in model_type :
679
793
# Extract model name from model_type
680
- if "llama3" in model_type .lower ():
681
- model_name = "llama3"
682
- elif "phi-3" in model_type .lower ():
683
- model_name = "phi3"
684
- elif "qwen2" in model_type .lower ():
685
- model_name = "qwen2"
686
- else :
687
- return "❌ Error: Unknown Ollama model type"
794
+ # Remove the 'Ollama - ' prefix and any leading/trailing whitespace
795
+ model_name = model_type .replace ("Ollama - " , "" ).strip ()
688
796
689
797
# Use Ollama to pull the model
690
798
try :
0 commit comments