Skip to content

Commit 87c2538

Browse files
authored
Merge pull request #37 from oracle-devrel/update
Update
2 parents bb0bce5 + 5033bc1 commit 87c2538

File tree

5 files changed

+707
-213
lines changed

5 files changed

+707
-213
lines changed

agentic_rag/gradio_app.py

Lines changed: 80 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -140,53 +140,37 @@ def chat(message: str, history: List[List[str]], agent_type: str, use_cot: bool,
140140
elif "8-bit" in agent_type:
141141
quantization = "8bit"
142142
model_type = "Local (Mistral)"
143-
elif "Ollama" in agent_type:
144-
model_type = "Ollama"
145-
# Extract model name from agent_type and use correct Ollama model names
146-
if "llama3" in agent_type.lower():
147-
model_name = "ollama:llama3"
148-
elif "phi-3" in agent_type.lower():
149-
model_name = "ollama:phi3"
150-
elif "qwen2" in agent_type.lower():
151-
model_name = "ollama:qwen2"
143+
elif agent_type == "openai":
144+
model_type = "OpenAI"
152145
else:
153-
model_type = agent_type
146+
# All other models are treated as Ollama models
147+
model_type = "Ollama"
148+
model_name = agent_type
154149

155150
# Select appropriate agent and reinitialize with correct settings
156-
if "Local" in model_type:
151+
if model_type == "OpenAI":
152+
if not openai_key:
153+
response_text = "OpenAI key not found. Please check your config."
154+
print(f"Error: {response_text}")
155+
return history + [[message, response_text]]
156+
agent = RAGAgent(vector_store, openai_api_key=openai_key, use_cot=use_cot,
157+
collection=collection, skip_analysis=skip_analysis)
158+
elif model_type == "Local (Mistral)":
157159
# For HF models, we need the token
158160
if not hf_token:
159161
response_text = "Local agent not available. Please check your HuggingFace token configuration."
160162
print(f"Error: {response_text}")
161163
return history + [[message, response_text]]
162164
agent = LocalRAGAgent(vector_store, use_cot=use_cot, collection=collection,
163165
skip_analysis=skip_analysis, quantization=quantization)
164-
elif model_type == "Ollama":
165-
# For Ollama models
166-
if model_name:
167-
try:
168-
agent = LocalRAGAgent(vector_store, model_name=model_name, use_cot=use_cot,
169-
collection=collection, skip_analysis=skip_analysis)
170-
except Exception as e:
171-
response_text = f"Error initializing Ollama model: {str(e)}. Falling back to Local Mistral."
172-
print(f"Error: {response_text}")
173-
# Fall back to Mistral if Ollama fails
174-
if hf_token:
175-
agent = LocalRAGAgent(vector_store, use_cot=use_cot, collection=collection,
176-
skip_analysis=skip_analysis)
177-
else:
178-
return history + [[message, "Local Mistral agent not available for fallback. Please check your HuggingFace token configuration."]]
179-
else:
180-
response_text = "Ollama model not specified correctly."
181-
print(f"Error: {response_text}")
182-
return history + [[message, response_text]]
183-
else:
184-
if not openai_key:
185-
response_text = "OpenAI agent not available. Please check your OpenAI API key configuration."
166+
else: # Ollama models
167+
try:
168+
agent = LocalRAGAgent(vector_store, model_name=model_name, use_cot=use_cot,
169+
collection=collection, skip_analysis=skip_analysis)
170+
except Exception as e:
171+
response_text = f"Error initializing Ollama model: {str(e)}"
186172
print(f"Error: {response_text}")
187173
return history + [[message, response_text]]
188-
agent = RAGAgent(vector_store, openai_api_key=openai_key, use_cot=use_cot,
189-
collection=collection, skip_analysis=skip_analysis)
190174

191175
# Process query and get response
192176
print("Processing query...")
@@ -305,92 +289,62 @@ def create_interface():
305289

306290
# Create model choices list for reuse
307291
model_choices = []
308-
# HF models first if token is available
309-
if hf_token:
310-
model_choices.extend([
311-
"Local (Mistral)",
312-
"Local (Mistral) - 4-bit Quantized",
313-
"Local (Mistral) - 8-bit Quantized",
314-
])
315-
# Then Ollama models (don't require HF token)
292+
# Only Ollama models (no more local Mistral deployments)
316293
model_choices.extend([
317-
"Ollama - llama3",
318-
"Ollama - phi-3",
319-
"Ollama - qwen2"
294+
"qwq",
295+
"gemma3",
296+
"llama3.3",
297+
"phi4",
298+
"mistral",
299+
"llava",
300+
"phi3",
301+
"deepseek-r1"
320302
])
321303
if openai_key:
322-
model_choices.append("OpenAI")
304+
model_choices.append("openai")
323305

324-
# Set default model to Ollama - qwen2
325-
default_model = "Ollama - qwen2"
306+
# Set default model to qwq
307+
default_model = "qwq"
326308

327309
# Model Management Tab (First Tab)
328310
with gr.Tab("Model Management"):
329311
gr.Markdown("""
330-
## Model Management
331-
332-
Download models in advance to prepare them for use in the chat interface.
333-
334-
### Hugging Face Models
335-
336-
For Hugging Face models (Mistral), you'll need a Hugging Face token in your config.yaml file.
337-
338-
### Ollama Models (Default)
339-
340-
Ollama models are used by default. For Ollama models, this will pull the model using the Ollama client.
341-
Make sure Ollama is installed and running on your system.
342-
You can download Ollama from [ollama.com/download](https://ollama.com/download)
312+
## Model Selection
313+
Choose your preferred model for the conversation.
343314
""")
344315

345316
with gr.Row():
346317
with gr.Column():
347318
model_dropdown = gr.Dropdown(
348319
choices=model_choices,
349-
value=default_model if default_model in model_choices else model_choices[0] if model_choices else None,
350-
label="Select Model to Download",
351-
interactive=True
320+
value=default_model,
321+
label="Select Model",
322+
info="Choose the model to use for the conversation"
352323
)
353324
download_button = gr.Button("Download Selected Model")
354325
model_status = gr.Textbox(
355326
label="Download Status",
356327
placeholder="Select a model and click Download to begin...",
357328
interactive=False
358329
)
359-
360-
with gr.Column():
361-
gr.Markdown("""
362-
### Model Information
363-
364-
**Ollama - qwen2** (DEFAULT): Alibaba's Qwen2 model via Ollama.
365-
- Size: ~4GB
366-
- Requires Ollama to be installed and running
367-
- High-quality model with good performance
368-
369-
**Ollama - llama3**: Meta's Llama 3 model via Ollama.
370-
- Size: ~4GB
371-
- Requires Ollama to be installed and running
372-
- Excellent performance and quality
373-
374-
**Ollama - phi-3**: Microsoft's Phi-3 model via Ollama.
375-
- Size: ~4GB
376-
- Requires Ollama to be installed and running
377-
- Efficient small model with good performance
378-
379-
**Local (Mistral)**: The default Mistral-7B-Instruct-v0.2 model.
380-
- Size: ~14GB
381-
- VRAM Required: ~8GB
382-
- Good balance of quality and speed
383-
384-
**Local (Mistral) - 4-bit Quantized**: 4-bit quantized version of Mistral-7B.
385-
- Size: ~4GB
386-
- VRAM Required: ~4GB
387-
- Faster inference with minimal quality loss
388-
389-
**Local (Mistral) - 8-bit Quantized**: 8-bit quantized version of Mistral-7B.
390-
- Size: ~7GB
391-
- VRAM Required: ~6GB
392-
- Balance between quality and memory usage
393-
""")
330+
331+
# Add model FAQ section
332+
gr.Markdown("""
333+
## Model FAQ
334+
335+
| Model | Parameters | Size | Download Command |
336+
|-------|------------|------|------------------|
337+
| qwq | 32B | 20GB | qwq:latest |
338+
| gemma3 | 4B | 3.3GB | gemma3:latest |
339+
| llama3.3 | 70B | 43GB | llama3.3:latest |
340+
| phi4 | 14B | 9.1GB | phi4:latest |
341+
| mistral | 7B | 4.1GB | mistral:latest |
342+
| llava | 7B | 4.5GB | llava:latest |
343+
| phi3 | 4B | 4.0GB | phi3:latest |
344+
| deepseek-r1 | 7B | 4.7GB | deepseek-r1:latest |
345+
346+
Note: All models are available through Ollama. Make sure Ollama is running on your system.
347+
""")
394348

395349
# Document Processing Tab
396350
with gr.Tab("Document Processing"):
@@ -580,13 +534,30 @@ def main():
580534
try:
581535
import ollama
582536
try:
583-
# Check if Ollama is running and qwen2 is available
537+
# Check if Ollama is running and list available models
584538
models = ollama.list().models
585539
available_models = [model.model for model in models]
586-
if "qwen2" not in available_models and "qwen2:latest" not in available_models:
587-
print("⚠️ Warning: Ollama is running but qwen2 model is not available. Please run 'ollama pull qwen2' or download through the interface.")
588-
except Exception:
589-
print("⚠️ Warning: Ollama is installed but not running or encountered an error. The default model may not work.")
540+
541+
# Check if any default models are available
542+
if "qwen2" not in available_models and "qwen2:latest" not in available_models and \
543+
"llama3" not in available_models and "llama3:latest" not in available_models and \
544+
"phi3" not in available_models and "phi3:latest" not in available_models:
545+
print("⚠️ Warning: Ollama is running but no default models (qwen2, llama3, phi3) are available.")
546+
print("Please download a model through the Model Management tab or run:")
547+
print(" ollama pull qwen2")
548+
print(" ollama pull llama3")
549+
print(" ollama pull phi3")
550+
else:
551+
available_default_models = []
552+
for model in ["qwen2", "llama3", "phi3"]:
553+
if model in available_models or f"{model}:latest" in available_models:
554+
available_default_models.append(model)
555+
556+
print(f"✅ Ollama is running with available default models: {', '.join(available_default_models)}")
557+
print(f"All available models: {', '.join(available_models)}")
558+
except Exception as e:
559+
print(f"⚠️ Warning: Ollama is installed but not running or encountered an error: {str(e)}")
560+
print("Please start Ollama before using the interface.")
590561
except ImportError:
591562
print("⚠️ Warning: Ollama package not installed. Please install with: pip install ollama")
592563

@@ -674,17 +645,11 @@ def download_model(model_type: str) -> str:
674645

675646
except Exception as e:
676647
return f"❌ Error downloading model: {str(e)}"
677-
678-
elif "Ollama" in model_type:
648+
# all ollama models
649+
else:
679650
# Extract model name from model_type
680-
if "llama3" in model_type.lower():
681-
model_name = "llama3"
682-
elif "phi-3" in model_type.lower():
683-
model_name = "phi3"
684-
elif "qwen2" in model_type.lower():
685-
model_name = "qwen2"
686-
else:
687-
return "❌ Error: Unknown Ollama model type"
651+
# Remove the 'Ollama - ' prefix and any leading/trailing whitespace
652+
model_name = model_type.replace("Ollama - ", "").strip()
688653

689654
# Use Ollama to pull the model
690655
try:
@@ -732,8 +697,6 @@ def download_model(model_type: str) -> str:
732697
return "❌ Error: Could not connect to Ollama. Please make sure Ollama is installed and running."
733698
except Exception as e:
734699
return f"❌ Error pulling Ollama model: {str(e)}"
735-
else:
736-
return "❌ Error: Unknown model type"
737700

738701
except Exception as e:
739702
return f"❌ Error: {str(e)}"

0 commit comments

Comments
 (0)