@@ -59,7 +59,15 @@ def load_config():
59
59
openai_key = os .getenv ("OPENAI_API_KEY" )
60
60
61
61
# Initialize agents with use_cot=True to ensure CoT is available
62
- local_agent = LocalRAGAgent (vector_store , use_cot = True ) if hf_token else None
62
+ # Default to Ollama qwen2, fall back to Mistral if available
63
+ try :
64
+ local_agent = LocalRAGAgent (vector_store , model_name = "ollama:qwen2" , use_cot = True )
65
+ print ("Using Ollama qwen2 as default model" )
66
+ except Exception as e :
67
+ print (f"Could not initialize Ollama qwen2: { str (e )} " )
68
+ local_agent = LocalRAGAgent (vector_store , use_cot = True ) if hf_token else None
69
+ print ("Falling back to Local Mistral model" if hf_token else "No local model available" )
70
+
63
71
openai_agent = RAGAgent (vector_store , openai_api_key = openai_key , use_cot = True ) if openai_key else None
64
72
65
73
def process_pdf (file : tempfile ._TemporaryFileWrapper ) -> str :
@@ -313,20 +321,23 @@ def create_interface():
313
321
if openai_key :
314
322
model_choices .append ("OpenAI" )
315
323
324
+ # Set default model to Ollama - qwen2
325
+ default_model = "Ollama - qwen2"
326
+
316
327
# Model Management Tab (First Tab)
317
328
with gr .Tab ("Model Management" ):
318
329
gr .Markdown ("""
319
330
## Model Management
320
331
321
332
Download models in advance to prepare them for use in the chat interface.
322
333
323
- ### Hugging Face Models (Default)
334
+ ### Hugging Face Models
324
335
325
- The system uses Mistral-7B by default. For Hugging Face models (Mistral), you'll need a Hugging Face token in your config.yaml file.
336
+ For Hugging Face models (Mistral), you'll need a Hugging Face token in your config.yaml file.
326
337
327
- ### Ollama Models (Alternative )
338
+ ### Ollama Models (Default )
328
339
329
- Ollama models are available as alternatives . For Ollama models, this will pull the model using the Ollama client.
340
+ Ollama models are used by default . For Ollama models, this will pull the model using the Ollama client.
330
341
Make sure Ollama is installed and running on your system.
331
342
You can download Ollama from [ollama.com/download](https://ollama.com/download)
332
343
""" )
@@ -335,7 +346,7 @@ def create_interface():
335
346
with gr .Column ():
336
347
model_dropdown = gr .Dropdown (
337
348
choices = model_choices ,
338
- value = model_choices [0 ] if model_choices else None ,
349
+ value = default_model if default_model in model_choices else model_choices [0 ] if model_choices else None ,
339
350
label = "Select Model to Download" ,
340
351
interactive = True
341
352
)
@@ -350,6 +361,21 @@ def create_interface():
350
361
gr .Markdown ("""
351
362
### Model Information
352
363
364
+ **Ollama - qwen2** (DEFAULT): Alibaba's Qwen2 model via Ollama.
365
+ - Size: ~4GB
366
+ - Requires Ollama to be installed and running
367
+ - High-quality model with good performance
368
+
369
+ **Ollama - llama3**: Meta's Llama 3 model via Ollama.
370
+ - Size: ~4GB
371
+ - Requires Ollama to be installed and running
372
+ - Excellent performance and quality
373
+
374
+ **Ollama - phi-3**: Microsoft's Phi-3 model via Ollama.
375
+ - Size: ~4GB
376
+ - Requires Ollama to be installed and running
377
+ - Efficient small model with good performance
378
+
353
379
**Local (Mistral)**: The default Mistral-7B-Instruct-v0.2 model.
354
380
- Size: ~14GB
355
381
- VRAM Required: ~8GB
@@ -364,21 +390,6 @@ def create_interface():
364
390
- Size: ~7GB
365
391
- VRAM Required: ~6GB
366
392
- Balance between quality and memory usage
367
-
368
- **Ollama - llama3**: Meta's Llama 3 model via Ollama.
369
- - Size: ~4GB
370
- - Requires Ollama to be installed and running
371
- - Excellent performance and quality
372
-
373
- **Ollama - phi-3**: Microsoft's Phi-3 model via Ollama.
374
- - Size: ~4GB
375
- - Requires Ollama to be installed and running
376
- - Efficient small model with good performance
377
-
378
- **Ollama - qwen2**: Alibaba's Qwen2 model via Ollama.
379
- - Size: ~4GB
380
- - Requires Ollama to be installed and running
381
- - High-quality model with good performance
382
393
""" )
383
394
384
395
# Document Processing Tab
@@ -412,7 +423,7 @@ def create_interface():
412
423
with gr .Column (scale = 1 ):
413
424
standard_agent_dropdown = gr .Dropdown (
414
425
choices = model_choices ,
415
- value = model_choices [0 ] if model_choices else None ,
426
+ value = default_model if default_model in model_choices else model_choices [0 ] if model_choices else None ,
416
427
label = "Select Agent"
417
428
)
418
429
with gr .Column (scale = 1 ):
@@ -441,7 +452,7 @@ def create_interface():
441
452
with gr .Column (scale = 1 ):
442
453
cot_agent_dropdown = gr .Dropdown (
443
454
choices = model_choices ,
444
- value = model_choices [0 ] if model_choices else None ,
455
+ value = default_model if default_model in model_choices else model_choices [0 ] if model_choices else None ,
445
456
label = "Select Agent"
446
457
)
447
458
with gr .Column (scale = 1 ):
@@ -536,7 +547,7 @@ def create_interface():
536
547
537
548
2. **Standard Chat Interface**:
538
549
- Quick responses without detailed reasoning steps
539
- - Select your preferred agent (Local Mistral or OpenAI )
550
+ - Select your preferred agent (Ollama qwen2 by default )
540
551
- Select which knowledge collection to query:
541
552
- **PDF Collection**: Always searches PDF documents
542
553
- **Repository Collection**: Always searches code repositories
@@ -551,19 +562,36 @@ def create_interface():
551
562
- Same collection selection options as the Standard Chat Interface
552
563
553
564
4. **Performance Expectations**:
565
+ - **Ollama models**: Typically faster inference, default is qwen2
554
566
- **Local (Mistral) model**: Initial loading takes 1-5 minutes, each query takes 30-60 seconds
555
- - **OpenAI model**: Much faster responses, typically a few seconds per query
556
- - Chain of Thought reasoning takes longer for both models
567
+ - **OpenAI model**: Fast responses, typically a few seconds per query
568
+ - Chain of Thought reasoning takes longer for all models
557
569
558
- Note: OpenAI agent requires an API key in `.env` file
570
+ Note: The interface will automatically detect available models based on your configuration:
571
+ - Ollama models are the default option (requires Ollama to be installed and running)
572
+ - Local Mistral model requires HuggingFace token in `config.yaml` (fallback option)
573
+ - OpenAI model requires API key in `.env` file
559
574
""" )
560
575
561
576
return interface
562
577
563
578
def main ():
564
579
# Check configuration
580
+ try :
581
+ import ollama
582
+ try :
583
+ # Check if Ollama is running and qwen2 is available
584
+ models = ollama .list ().models
585
+ available_models = [model .model for model in models ]
586
+ if "qwen2" not in available_models and "qwen2:latest" not in available_models :
587
+ print ("⚠️ Warning: Ollama is running but qwen2 model is not available. Please run 'ollama pull qwen2' or download through the interface." )
588
+ except Exception :
589
+ print ("⚠️ Warning: Ollama is installed but not running or encountered an error. The default model may not work." )
590
+ except ImportError :
591
+ print ("⚠️ Warning: Ollama package not installed. Please install with: pip install ollama" )
592
+
565
593
if not hf_token and not openai_key :
566
- print ("⚠️ Warning: Neither HuggingFace token nor OpenAI key found. Please configure at least one ." )
594
+ print ("⚠️ Warning: Neither HuggingFace token nor OpenAI key found. Using Ollama only ." )
567
595
568
596
# Launch interface
569
597
interface = create_interface ()
0 commit comments