@@ -59,7 +59,15 @@ def load_config():
5959openai_key = os .getenv ("OPENAI_API_KEY" )
6060
6161# Initialize agents with use_cot=True to ensure CoT is available
62- local_agent = LocalRAGAgent (vector_store , use_cot = True ) if hf_token else None
62+ # Default to Ollama qwen2, fall back to Mistral if available
63+ try :
64+ local_agent = LocalRAGAgent (vector_store , model_name = "ollama:qwen2" , use_cot = True )
65+ print ("Using Ollama qwen2 as default model" )
66+ except Exception as e :
67+ print (f"Could not initialize Ollama qwen2: { str (e )} " )
68+ local_agent = LocalRAGAgent (vector_store , use_cot = True ) if hf_token else None
69+ print ("Falling back to Local Mistral model" if hf_token else "No local model available" )
70+
6371openai_agent = RAGAgent (vector_store , openai_api_key = openai_key , use_cot = True ) if openai_key else None
6472
6573def process_pdf (file : tempfile ._TemporaryFileWrapper ) -> str :
@@ -313,20 +321,23 @@ def create_interface():
313321 if openai_key :
314322 model_choices .append ("OpenAI" )
315323
324+ # Set default model to Ollama - qwen2
325+ default_model = "Ollama - qwen2"
326+
316327 # Model Management Tab (First Tab)
317328 with gr .Tab ("Model Management" ):
318329 gr .Markdown ("""
319330 ## Model Management
320331
321332 Download models in advance to prepare them for use in the chat interface.
322333
323- ### Hugging Face Models (Default)
334+ ### Hugging Face Models
324335
325- The system uses Mistral-7B by default. For Hugging Face models (Mistral), you'll need a Hugging Face token in your config.yaml file.
336+ For Hugging Face models (Mistral), you'll need a Hugging Face token in your config.yaml file.
326337
327- ### Ollama Models (Alternative )
338+ ### Ollama Models (Default )
328339
329- Ollama models are available as alternatives . For Ollama models, this will pull the model using the Ollama client.
340+ Ollama models are used by default . For Ollama models, this will pull the model using the Ollama client.
330341 Make sure Ollama is installed and running on your system.
331342 You can download Ollama from [ollama.com/download](https://ollama.com/download)
332343 """ )
@@ -335,7 +346,7 @@ def create_interface():
335346 with gr .Column ():
336347 model_dropdown = gr .Dropdown (
337348 choices = model_choices ,
338- value = model_choices [0 ] if model_choices else None ,
349+ value = default_model if default_model in model_choices else model_choices [0 ] if model_choices else None ,
339350 label = "Select Model to Download" ,
340351 interactive = True
341352 )
@@ -350,6 +361,21 @@ def create_interface():
350361 gr .Markdown ("""
351362 ### Model Information
352363
364+ **Ollama - qwen2** (DEFAULT): Alibaba's Qwen2 model via Ollama.
365+ - Size: ~4GB
366+ - Requires Ollama to be installed and running
367+ - High-quality model with good performance
368+
369+ **Ollama - llama3**: Meta's Llama 3 model via Ollama.
370+ - Size: ~4GB
371+ - Requires Ollama to be installed and running
372+ - Excellent performance and quality
373+
374+ **Ollama - phi-3**: Microsoft's Phi-3 model via Ollama.
375+ - Size: ~4GB
376+ - Requires Ollama to be installed and running
377+ - Efficient small model with good performance
378+
353379 **Local (Mistral)**: The default Mistral-7B-Instruct-v0.2 model.
354380 - Size: ~14GB
355381 - VRAM Required: ~8GB
@@ -364,21 +390,6 @@ def create_interface():
364390 - Size: ~7GB
365391 - VRAM Required: ~6GB
366392 - Balance between quality and memory usage
367-
368- **Ollama - llama3**: Meta's Llama 3 model via Ollama.
369- - Size: ~4GB
370- - Requires Ollama to be installed and running
371- - Excellent performance and quality
372-
373- **Ollama - phi-3**: Microsoft's Phi-3 model via Ollama.
374- - Size: ~4GB
375- - Requires Ollama to be installed and running
376- - Efficient small model with good performance
377-
378- **Ollama - qwen2**: Alibaba's Qwen2 model via Ollama.
379- - Size: ~4GB
380- - Requires Ollama to be installed and running
381- - High-quality model with good performance
382393 """ )
383394
384395 # Document Processing Tab
@@ -412,7 +423,7 @@ def create_interface():
412423 with gr .Column (scale = 1 ):
413424 standard_agent_dropdown = gr .Dropdown (
414425 choices = model_choices ,
415- value = model_choices [0 ] if model_choices else None ,
426+ value = default_model if default_model in model_choices else model_choices [0 ] if model_choices else None ,
416427 label = "Select Agent"
417428 )
418429 with gr .Column (scale = 1 ):
@@ -441,7 +452,7 @@ def create_interface():
441452 with gr .Column (scale = 1 ):
442453 cot_agent_dropdown = gr .Dropdown (
443454 choices = model_choices ,
444- value = model_choices [0 ] if model_choices else None ,
455+ value = default_model if default_model in model_choices else model_choices [0 ] if model_choices else None ,
445456 label = "Select Agent"
446457 )
447458 with gr .Column (scale = 1 ):
@@ -536,7 +547,7 @@ def create_interface():
536547
537548 2. **Standard Chat Interface**:
538549 - Quick responses without detailed reasoning steps
539- - Select your preferred agent (Local Mistral or OpenAI )
550+ - Select your preferred agent (Ollama qwen2 by default )
540551 - Select which knowledge collection to query:
541552 - **PDF Collection**: Always searches PDF documents
542553 - **Repository Collection**: Always searches code repositories
@@ -551,19 +562,36 @@ def create_interface():
551562 - Same collection selection options as the Standard Chat Interface
552563
553564 4. **Performance Expectations**:
565+ - **Ollama models**: Typically faster inference, default is qwen2
554566 - **Local (Mistral) model**: Initial loading takes 1-5 minutes, each query takes 30-60 seconds
555- - **OpenAI model**: Much faster responses, typically a few seconds per query
556- - Chain of Thought reasoning takes longer for both models
567+ - **OpenAI model**: Fast responses, typically a few seconds per query
568+ - Chain of Thought reasoning takes longer for all models
557569
558- Note: OpenAI agent requires an API key in `.env` file
570+ Note: The interface will automatically detect available models based on your configuration:
571+ - Ollama models are the default option (requires Ollama to be installed and running)
572+ - Local Mistral model requires HuggingFace token in `config.yaml` (fallback option)
573+ - OpenAI model requires API key in `.env` file
559574 """ )
560575
561576 return interface
562577
563578def main ():
564579 # Check configuration
580+ try :
581+ import ollama
582+ try :
583+ # Check if Ollama is running and qwen2 is available
584+ models = ollama .list ().models
585+ available_models = [model .model for model in models ]
586+ if "qwen2" not in available_models and "qwen2:latest" not in available_models :
587+ print ("⚠️ Warning: Ollama is running but qwen2 model is not available. Please run 'ollama pull qwen2' or download through the interface." )
588+ except Exception :
589+ print ("⚠️ Warning: Ollama is installed but not running or encountered an error. The default model may not work." )
590+ except ImportError :
591+ print ("⚠️ Warning: Ollama package not installed. Please install with: pip install ollama" )
592+
565593 if not hf_token and not openai_key :
566- print ("⚠️ Warning: Neither HuggingFace token nor OpenAI key found. Please configure at least one ." )
594+ print ("⚠️ Warning: Neither HuggingFace token nor OpenAI key found. Using Ollama only ." )
567595
568596 # Launch interface
569597 interface = create_interface ()
0 commit comments