Skip to content

Commit aab4bf4

Browse files
committed
Set ollama:qwen2 as default model throughout the application
1 parent af37695 commit aab4bf4

File tree

2 files changed

+67
-40
lines changed

2 files changed

+67
-40
lines changed

agentic_rag/gradio_app.py

Lines changed: 56 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,15 @@ def load_config():
5959
openai_key = os.getenv("OPENAI_API_KEY")
6060

6161
# Initialize agents with use_cot=True to ensure CoT is available
62-
local_agent = LocalRAGAgent(vector_store, use_cot=True) if hf_token else None
62+
# Default to Ollama qwen2, fall back to Mistral if available
63+
try:
64+
local_agent = LocalRAGAgent(vector_store, model_name="ollama:qwen2", use_cot=True)
65+
print("Using Ollama qwen2 as default model")
66+
except Exception as e:
67+
print(f"Could not initialize Ollama qwen2: {str(e)}")
68+
local_agent = LocalRAGAgent(vector_store, use_cot=True) if hf_token else None
69+
print("Falling back to Local Mistral model" if hf_token else "No local model available")
70+
6371
openai_agent = RAGAgent(vector_store, openai_api_key=openai_key, use_cot=True) if openai_key else None
6472

6573
def process_pdf(file: tempfile._TemporaryFileWrapper) -> str:
@@ -313,20 +321,23 @@ def create_interface():
313321
if openai_key:
314322
model_choices.append("OpenAI")
315323

324+
# Set default model to Ollama - qwen2
325+
default_model = "Ollama - qwen2"
326+
316327
# Model Management Tab (First Tab)
317328
with gr.Tab("Model Management"):
318329
gr.Markdown("""
319330
## Model Management
320331
321332
Download models in advance to prepare them for use in the chat interface.
322333
323-
### Hugging Face Models (Default)
334+
### Hugging Face Models
324335
325-
The system uses Mistral-7B by default. For Hugging Face models (Mistral), you'll need a Hugging Face token in your config.yaml file.
336+
For Hugging Face models (Mistral), you'll need a Hugging Face token in your config.yaml file.
326337
327-
### Ollama Models (Alternative)
338+
### Ollama Models (Default)
328339
329-
Ollama models are available as alternatives. For Ollama models, this will pull the model using the Ollama client.
340+
Ollama models are used by default. For Ollama models, this will pull the model using the Ollama client.
330341
Make sure Ollama is installed and running on your system.
331342
You can download Ollama from [ollama.com/download](https://ollama.com/download)
332343
""")
@@ -335,7 +346,7 @@ def create_interface():
335346
with gr.Column():
336347
model_dropdown = gr.Dropdown(
337348
choices=model_choices,
338-
value=model_choices[0] if model_choices else None,
349+
value=default_model if default_model in model_choices else model_choices[0] if model_choices else None,
339350
label="Select Model to Download",
340351
interactive=True
341352
)
@@ -350,6 +361,21 @@ def create_interface():
350361
gr.Markdown("""
351362
### Model Information
352363
364+
**Ollama - qwen2** (DEFAULT): Alibaba's Qwen2 model via Ollama.
365+
- Size: ~4GB
366+
- Requires Ollama to be installed and running
367+
- High-quality model with good performance
368+
369+
**Ollama - llama3**: Meta's Llama 3 model via Ollama.
370+
- Size: ~4GB
371+
- Requires Ollama to be installed and running
372+
- Excellent performance and quality
373+
374+
**Ollama - phi-3**: Microsoft's Phi-3 model via Ollama.
375+
- Size: ~4GB
376+
- Requires Ollama to be installed and running
377+
- Efficient small model with good performance
378+
353379
**Local (Mistral)**: The default Mistral-7B-Instruct-v0.2 model.
354380
- Size: ~14GB
355381
- VRAM Required: ~8GB
@@ -364,21 +390,6 @@ def create_interface():
364390
- Size: ~7GB
365391
- VRAM Required: ~6GB
366392
- Balance between quality and memory usage
367-
368-
**Ollama - llama3**: Meta's Llama 3 model via Ollama.
369-
- Size: ~4GB
370-
- Requires Ollama to be installed and running
371-
- Excellent performance and quality
372-
373-
**Ollama - phi-3**: Microsoft's Phi-3 model via Ollama.
374-
- Size: ~4GB
375-
- Requires Ollama to be installed and running
376-
- Efficient small model with good performance
377-
378-
**Ollama - qwen2**: Alibaba's Qwen2 model via Ollama.
379-
- Size: ~4GB
380-
- Requires Ollama to be installed and running
381-
- High-quality model with good performance
382393
""")
383394

384395
# Document Processing Tab
@@ -412,7 +423,7 @@ def create_interface():
412423
with gr.Column(scale=1):
413424
standard_agent_dropdown = gr.Dropdown(
414425
choices=model_choices,
415-
value=model_choices[0] if model_choices else None,
426+
value=default_model if default_model in model_choices else model_choices[0] if model_choices else None,
416427
label="Select Agent"
417428
)
418429
with gr.Column(scale=1):
@@ -441,7 +452,7 @@ def create_interface():
441452
with gr.Column(scale=1):
442453
cot_agent_dropdown = gr.Dropdown(
443454
choices=model_choices,
444-
value=model_choices[0] if model_choices else None,
455+
value=default_model if default_model in model_choices else model_choices[0] if model_choices else None,
445456
label="Select Agent"
446457
)
447458
with gr.Column(scale=1):
@@ -536,7 +547,7 @@ def create_interface():
536547
537548
2. **Standard Chat Interface**:
538549
- Quick responses without detailed reasoning steps
539-
- Select your preferred agent (Local Mistral or OpenAI)
550+
- Select your preferred agent (Ollama qwen2 by default)
540551
- Select which knowledge collection to query:
541552
- **PDF Collection**: Always searches PDF documents
542553
- **Repository Collection**: Always searches code repositories
@@ -551,19 +562,36 @@ def create_interface():
551562
- Same collection selection options as the Standard Chat Interface
552563
553564
4. **Performance Expectations**:
565+
- **Ollama models**: Typically faster inference, default is qwen2
554566
- **Local (Mistral) model**: Initial loading takes 1-5 minutes, each query takes 30-60 seconds
555-
- **OpenAI model**: Much faster responses, typically a few seconds per query
556-
- Chain of Thought reasoning takes longer for both models
567+
- **OpenAI model**: Fast responses, typically a few seconds per query
568+
- Chain of Thought reasoning takes longer for all models
557569
558-
Note: OpenAI agent requires an API key in `.env` file
570+
Note: The interface will automatically detect available models based on your configuration:
571+
- Ollama models are the default option (requires Ollama to be installed and running)
572+
- Local Mistral model requires HuggingFace token in `config.yaml` (fallback option)
573+
- OpenAI model requires API key in `.env` file
559574
""")
560575

561576
return interface
562577

563578
def main():
564579
# Check configuration
580+
try:
581+
import ollama
582+
try:
583+
# Check if Ollama is running and qwen2 is available
584+
models = ollama.list().models
585+
available_models = [model.model for model in models]
586+
if "qwen2" not in available_models and "qwen2:latest" not in available_models:
587+
print("⚠️ Warning: Ollama is running but qwen2 model is not available. Please run 'ollama pull qwen2' or download through the interface.")
588+
except Exception:
589+
print("⚠️ Warning: Ollama is installed but not running or encountered an error. The default model may not work.")
590+
except ImportError:
591+
print("⚠️ Warning: Ollama package not installed. Please install with: pip install ollama")
592+
565593
if not hf_token and not openai_key:
566-
print("⚠️ Warning: Neither HuggingFace token nor OpenAI key found. Please configure at least one.")
594+
print("⚠️ Warning: Neither HuggingFace token nor OpenAI key found. Using Ollama only.")
567595

568596
# Launch interface
569597
interface = create_interface()

agentic_rag/local_rag_agent.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -500,19 +500,18 @@ def _generate_general_response(self, query: str) -> Dict[str, Any]:
500500
}
501501

502502
def main():
503-
parser = argparse.ArgumentParser(description="Query documents using local Mistral model")
504-
parser.add_argument("--query", required=True, help="Query to process")
505-
parser.add_argument("--store-path", default="embeddings", help="Path to the vector store")
506-
parser.add_argument("--model", default="mistralai/Mistral-7B-Instruct-v0.2", help="Model to use")
507-
parser.add_argument("--quiet", action="store_true", help="Disable verbose logging")
508-
parser.add_argument("--use-cot", action="store_true", help="Enable Chain of Thought reasoning")
509-
parser.add_argument("--collection", choices=["PDF Collection", "Repository Collection", "General Knowledge", "Web Knowledge Base"],
510-
help="Specify which collection to query")
511-
parser.add_argument("--skip-analysis", action="store_true", help="Skip query analysis step")
503+
parser = argparse.ArgumentParser(description="Query documents using local LLM")
504+
parser.add_argument("--query", required=True, help="Query to search for")
505+
parser.add_argument("--embeddings", default="oracle", choices=["oracle", "chromadb"], help="Embeddings backend to use")
506+
parser.add_argument("--model", default="ollama:qwen2", help="Model to use (default: ollama:qwen2)")
507+
parser.add_argument("--collection", help="Collection to search (PDF, Repository, General Knowledge)")
508+
parser.add_argument("--use-cot", action="store_true", help="Use Chain of Thought reasoning")
509+
parser.add_argument("--store-path", default="embeddings", help="Path to ChromaDB store")
510+
parser.add_argument("--skip-analysis", action="store_true", help="Skip query analysis (not recommended)")
512511
parser.add_argument("--verbose", action="store_true", help="Show full content of sources")
513-
parser.add_argument("--embeddings", choices=["oracle", "chromadb"], default="oracle",
514-
help="Select embeddings backend (default: oracle)")
515-
512+
parser.add_argument("--quiet", action="store_true", help="Disable verbose logging")
513+
parser.add_argument("--quantization", choices=["4bit", "8bit"], help="Quantization method (4bit or 8bit)")
514+
516515
args = parser.parse_args()
517516

518517
# Set logging level based on quiet flag

0 commit comments

Comments
 (0)