Skip to content

Commit 0c43536

Browse files
committed
feat: added more supported models
1 parent 7fb6f12 commit 0c43536

File tree

2 files changed

+158
-43
lines changed

2 files changed

+158
-43
lines changed

agentic_rag/gradio_app.py

Lines changed: 147 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -142,13 +142,7 @@ def chat(message: str, history: List[List[str]], agent_type: str, use_cot: bool,
142142
model_type = "Local (Mistral)"
143143
elif "Ollama" in agent_type:
144144
model_type = "Ollama"
145-
# Extract model name from agent_type and use correct Ollama model names
146-
if "llama3" in agent_type.lower():
147-
model_name = "ollama:llama3"
148-
elif "phi-3" in agent_type.lower():
149-
model_name = "ollama:phi3"
150-
elif "qwen2" in agent_type.lower():
151-
model_name = "ollama:qwen2"
145+
# Model name will be extracted later
152146
else:
153147
model_type = agent_type
154148

@@ -161,28 +155,26 @@ def chat(message: str, history: List[List[str]], agent_type: str, use_cot: bool,
161155
return history + [[message, response_text]]
162156
agent = LocalRAGAgent(vector_store, use_cot=use_cot, collection=collection,
163157
skip_analysis=skip_analysis, quantization=quantization)
164-
elif model_type == "Ollama":
158+
elif "Ollama" in model_type:
165159
# For Ollama models
166-
if model_name:
167-
try:
168-
agent = LocalRAGAgent(vector_store, model_name=model_name, use_cot=use_cot,
169-
collection=collection, skip_analysis=skip_analysis)
170-
except Exception as e:
171-
response_text = f"Error initializing Ollama model: {str(e)}. Falling back to Local Mistral."
172-
print(f"Error: {response_text}")
173-
# Fall back to Mistral if Ollama fails
174-
if hf_token:
175-
agent = LocalRAGAgent(vector_store, use_cot=use_cot, collection=collection,
176-
skip_analysis=skip_analysis)
177-
else:
178-
return history + [[message, "Local Mistral agent not available for fallback. Please check your HuggingFace token configuration."]]
179-
else:
180-
response_text = "Ollama model not specified correctly."
160+
# Extract model name directly from the model_type
161+
model_name = model_type.replace("Ollama - ", "").strip()
162+
163+
try:
164+
agent = LocalRAGAgent(vector_store, model_name=model_name, use_cot=use_cot,
165+
collection=collection, skip_analysis=skip_analysis)
166+
except Exception as e:
167+
response_text = f"Error initializing Ollama model: {str(e)}. Falling back to Local Mistral."
181168
print(f"Error: {response_text}")
182-
return history + [[message, response_text]]
169+
# Fall back to Mistral if Ollama fails
170+
if hf_token:
171+
agent = LocalRAGAgent(vector_store, use_cot=use_cot, collection=collection,
172+
skip_analysis=skip_analysis)
173+
else:
174+
return history + [[message, "Local Mistral agent not available for fallback. Please check your HuggingFace token configuration."]]
183175
else:
184176
if not openai_key:
185-
response_text = "OpenAI agent not available. Please check your OpenAI API key configuration."
177+
response_text = "OpenAI key not found. Please check your config."
186178
print(f"Error: {response_text}")
187179
return history + [[message, response_text]]
188180
agent = RAGAgent(vector_store, openai_api_key=openai_key, use_cot=use_cot,
@@ -316,7 +308,32 @@ def create_interface():
316308
model_choices.extend([
317309
"Ollama - llama3",
318310
"Ollama - phi-3",
319-
"Ollama - qwen2"
311+
"Ollama - qwen2",
312+
# New Ollama models
313+
"Ollama - gemma3:1b",
314+
"Ollama - gemma3",
315+
"Ollama - gemma3:12b",
316+
"Ollama - gemma3:27b",
317+
"Ollama - qwq",
318+
"Ollama - deepseek-r1",
319+
"Ollama - deepseek-r1:671b",
320+
"Ollama - llama3.3",
321+
"Ollama - llama3.2",
322+
"Ollama - llama3.2:1b",
323+
"Ollama - llama3.2-vision",
324+
"Ollama - llama3.2-vision:90b",
325+
"Ollama - llama3.1",
326+
"Ollama - llama3.1:405b",
327+
"Ollama - phi4",
328+
"Ollama - phi4-mini",
329+
"Ollama - mistral",
330+
"Ollama - moondream",
331+
"Ollama - neural-chat",
332+
"Ollama - starling-lm",
333+
"Ollama - codellama",
334+
"Ollama - llama2-uncensored",
335+
"Ollama - llava",
336+
"Ollama - granite3.2"
320337
])
321338
if openai_key:
322339
model_choices.append("OpenAI")
@@ -390,8 +407,88 @@ def create_interface():
390407
- Size: ~7GB
391408
- VRAM Required: ~6GB
392409
- Balance between quality and memory usage
410+
411+
For a complete list of supported models and specifications, see the **Model FAQ** tab.
393412
""")
394413

414+
# Model FAQ Tab
415+
with gr.Tab("Model FAQ"):
416+
gr.Markdown("""
417+
## Model Information & Technical Requirements
418+
419+
This page provides detailed information about all supported models, including size, parameter count, and hardware requirements.
420+
421+
### Memory Requirements
422+
423+
As a general guideline:
424+
- You should have at least 8 GB of RAM available to run 7B parameter models
425+
- You should have at least 16 GB of RAM available to run 13B parameter models
426+
- You should have at least 32 GB of RAM available to run 33B+ parameter models
427+
- For vision models, additional memory is required for image processing
428+
429+
### Ollama Models
430+
431+
| Model | Parameters | Size | Download Command |
432+
|-------|------------|------|-----------------|
433+
| Gemma 3 | 1B | 815MB | ollama run gemma3:1b |
434+
| Gemma 3 | 4B | 3.3GB | ollama run gemma3 |
435+
| Gemma 3 | 12B | 8.1GB | ollama run gemma3:12b |
436+
| Gemma 3 | 27B | 17GB | ollama run gemma3:27b |
437+
| QwQ | 32B | 20GB | ollama run qwq |
438+
| DeepSeek-R1 | 7B | 4.7GB | ollama run deepseek-r1 |
439+
| DeepSeek-R1 | 671B | 404GB | ollama run deepseek-r1:671b |
440+
| Llama 3.3 | 70B | 43GB | ollama run llama3.3 |
441+
| Llama 3.2 | 3B | 2.0GB | ollama run llama3.2 |
442+
| Llama 3.2 | 1B | 1.3GB | ollama run llama3.2:1b |
443+
| Llama 3.2 Vision | 11B | 7.9GB | ollama run llama3.2-vision |
444+
| Llama 3.2 Vision | 90B | 55GB | ollama run llama3.2-vision:90b |
445+
| Llama 3.1 | 8B | 4.7GB | ollama run llama3.1 |
446+
| Llama 3.1 | 405B | 231GB | ollama run llama3.1:405b |
447+
| Phi 4 | 14B | 9.1GB | ollama run phi4 |
448+
| Phi 4 Mini | 3.8B | 2.5GB | ollama run phi4-mini |
449+
| Mistral | 7B | 4.1GB | ollama run mistral |
450+
| Moondream 2 | 1.4B | 829MB | ollama run moondream |
451+
| Neural Chat | 7B | 4.1GB | ollama run neural-chat |
452+
| Starling | 7B | 4.1GB | ollama run starling-lm |
453+
| Code Llama | 7B | 3.8GB | ollama run codellama |
454+
| Llama 2 Uncensored | 7B | 3.8GB | ollama run llama2-uncensored |
455+
| LLaVA | 7B | 4.5GB | ollama run llava |
456+
| Granite-3.2 | 8B | 4.9GB | ollama run granite3.2 |
457+
| Llama 3 | 8B | 4.7GB | ollama run llama3 |
458+
| Phi 3 | 4B | 4.0GB | ollama run phi3 |
459+
| Qwen 2 | 7B | 4.1GB | ollama run qwen2 |
460+
461+
### HuggingFace Models
462+
463+
| Model | Parameters | Size | Quantization | VRAM Required |
464+
|-------|------------|------|--------------|---------------|
465+
| Mistral | 7B | 14GB | None | 8GB |
466+
| Mistral | 7B | 4GB | 4-bit | 4GB |
467+
| Mistral | 7B | 7GB | 8-bit | 6GB |
468+
469+
### Recommended Models
470+
471+
**Best Overall Performance**:
472+
- Ollama - llama3
473+
- Ollama - llama3.2-vision (for image processing)
474+
- Ollama - phi4
475+
476+
**Best for Limited Hardware (8GB RAM)**:
477+
- Ollama - llama3.2:1b
478+
- Ollama - gemma3:1b
479+
- Ollama - phi4-mini
480+
- Ollama - moondream
481+
482+
**Best for Code Tasks**:
483+
- Ollama - codellama
484+
- Ollama - deepseek-r1
485+
486+
**Best for Enterprise Use**:
487+
- Ollama - qwen2
488+
- Ollama - granite3.2
489+
- Ollama - neural-chat
490+
""")
491+
395492
# Document Processing Tab
396493
with gr.Tab("Document Processing"):
397494
with gr.Row():
@@ -580,13 +677,30 @@ def main():
580677
try:
581678
import ollama
582679
try:
583-
# Check if Ollama is running and qwen2 is available
680+
# Check if Ollama is running and list available models
584681
models = ollama.list().models
585682
available_models = [model.model for model in models]
586-
if "qwen2" not in available_models and "qwen2:latest" not in available_models:
587-
print("⚠️ Warning: Ollama is running but qwen2 model is not available. Please run 'ollama pull qwen2' or download through the interface.")
588-
except Exception:
589-
print("⚠️ Warning: Ollama is installed but not running or encountered an error. The default model may not work.")
683+
684+
# Check if any default models are available
685+
if "qwen2" not in available_models and "qwen2:latest" not in available_models and \
686+
"llama3" not in available_models and "llama3:latest" not in available_models and \
687+
"phi3" not in available_models and "phi3:latest" not in available_models:
688+
print("⚠️ Warning: Ollama is running but no default models (qwen2, llama3, phi3) are available.")
689+
print("Please download a model through the Model Management tab or run:")
690+
print(" ollama pull qwen2")
691+
print(" ollama pull llama3")
692+
print(" ollama pull phi3")
693+
else:
694+
available_default_models = []
695+
for model in ["qwen2", "llama3", "phi3"]:
696+
if model in available_models or f"{model}:latest" in available_models:
697+
available_default_models.append(model)
698+
699+
print(f"✅ Ollama is running with available default models: {', '.join(available_default_models)}")
700+
print(f"All available models: {', '.join(available_models)}")
701+
except Exception as e:
702+
print(f"⚠️ Warning: Ollama is installed but not running or encountered an error: {str(e)}")
703+
print("Please start Ollama before using the interface.")
590704
except ImportError:
591705
print("⚠️ Warning: Ollama package not installed. Please install with: pip install ollama")
592706

@@ -677,14 +791,8 @@ def download_model(model_type: str) -> str:
677791

678792
elif "Ollama" in model_type:
679793
# Extract model name from model_type
680-
if "llama3" in model_type.lower():
681-
model_name = "llama3"
682-
elif "phi-3" in model_type.lower():
683-
model_name = "phi3"
684-
elif "qwen2" in model_type.lower():
685-
model_name = "qwen2"
686-
else:
687-
return "❌ Error: Unknown Ollama model type"
794+
# Remove the 'Ollama - ' prefix and any leading/trailing whitespace
795+
model_name = model_type.replace("Ollama - ", "").strip()
688796

689797
# Use Ollama to pull the model
690798
try:

agentic_rag/local_rag_agent.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ def __init__(self, model_name: str):
5757
Args:
5858
model_name: Name of the Ollama model to use
5959
"""
60-
# Remove the 'ollama:' prefix if present
61-
self.model_name = model_name.replace("ollama:", "") if model_name.startswith("ollama:") else model_name
60+
# Use the model name directly without any transformation
61+
self.model_name = model_name
6262
self._check_ollama_running()
6363

6464
def _check_ollama_running(self):
@@ -165,11 +165,18 @@ def __init__(self, vector_store: VectorStore = None, model_name: str = "mistrala
165165
# skip_analysis parameter kept for backward compatibility but no longer used
166166

167167
# Check if this is an Ollama model
168-
self.is_ollama = model_name.startswith("ollama:")
168+
self.is_ollama = model_name.startswith("ollama:") or "ollama" in model_name.lower()
169169

170170
if self.is_ollama:
171171
# Extract the actual model name from the prefix
172-
ollama_model_name = model_name.replace("ollama:", "")
172+
# If model_name contains 'ollama:' prefix, remove it
173+
# If model_name is from gradio interface (e.g., "Ollama - llama3"), extract just the model name
174+
if model_name.startswith("ollama:"):
175+
ollama_model_name = model_name.replace("ollama:", "")
176+
elif "Ollama - " in model_name:
177+
ollama_model_name = model_name.replace("Ollama - ", "")
178+
else:
179+
ollama_model_name = model_name
173180

174181
# Load Ollama model
175182
print("\nLoading Ollama model...")

0 commit comments

Comments
 (0)