@@ -140,53 +140,37 @@ def chat(message: str, history: List[List[str]], agent_type: str, use_cot: bool,
140140 elif "8-bit" in agent_type :
141141 quantization = "8bit"
142142 model_type = "Local (Mistral)"
143- elif "Ollama" in agent_type :
144- model_type = "Ollama"
145- # Extract model name from agent_type and use correct Ollama model names
146- if "llama3" in agent_type .lower ():
147- model_name = "ollama:llama3"
148- elif "phi-3" in agent_type .lower ():
149- model_name = "ollama:phi3"
150- elif "qwen2" in agent_type .lower ():
151- model_name = "ollama:qwen2"
143+ elif agent_type == "openai" :
144+ model_type = "OpenAI"
152145 else :
153- model_type = agent_type
146+ # All other models are treated as Ollama models
147+ model_type = "Ollama"
148+ model_name = agent_type
154149
155150 # Select appropriate agent and reinitialize with correct settings
156- if "Local" in model_type :
151+ if model_type == "OpenAI" :
152+ if not openai_key :
153+ response_text = "OpenAI key not found. Please check your config."
154+ print (f"Error: { response_text } " )
155+ return history + [[message , response_text ]]
156+ agent = RAGAgent (vector_store , openai_api_key = openai_key , use_cot = use_cot ,
157+ collection = collection , skip_analysis = skip_analysis )
158+ elif model_type == "Local (Mistral)" :
157159 # For HF models, we need the token
158160 if not hf_token :
159161 response_text = "Local agent not available. Please check your HuggingFace token configuration."
160162 print (f"Error: { response_text } " )
161163 return history + [[message , response_text ]]
162164 agent = LocalRAGAgent (vector_store , use_cot = use_cot , collection = collection ,
163165 skip_analysis = skip_analysis , quantization = quantization )
164- elif model_type == "Ollama" :
165- # For Ollama models
166- if model_name :
167- try :
168- agent = LocalRAGAgent (vector_store , model_name = model_name , use_cot = use_cot ,
169- collection = collection , skip_analysis = skip_analysis )
170- except Exception as e :
171- response_text = f"Error initializing Ollama model: { str (e )} . Falling back to Local Mistral."
172- print (f"Error: { response_text } " )
173- # Fall back to Mistral if Ollama fails
174- if hf_token :
175- agent = LocalRAGAgent (vector_store , use_cot = use_cot , collection = collection ,
176- skip_analysis = skip_analysis )
177- else :
178- return history + [[message , "Local Mistral agent not available for fallback. Please check your HuggingFace token configuration." ]]
179- else :
180- response_text = "Ollama model not specified correctly."
181- print (f"Error: { response_text } " )
182- return history + [[message , response_text ]]
183- else :
184- if not openai_key :
185- response_text = "OpenAI agent not available. Please check your OpenAI API key configuration."
166+ else : # Ollama models
167+ try :
168+ agent = LocalRAGAgent (vector_store , model_name = model_name , use_cot = use_cot ,
169+ collection = collection , skip_analysis = skip_analysis )
170+ except Exception as e :
171+ response_text = f"Error initializing Ollama model: { str (e )} "
186172 print (f"Error: { response_text } " )
187173 return history + [[message , response_text ]]
188- agent = RAGAgent (vector_store , openai_api_key = openai_key , use_cot = use_cot ,
189- collection = collection , skip_analysis = skip_analysis )
190174
191175 # Process query and get response
192176 print ("Processing query..." )
@@ -305,92 +289,62 @@ def create_interface():
305289
306290 # Create model choices list for reuse
307291 model_choices = []
308- # HF models first if token is available
309- if hf_token :
310- model_choices .extend ([
311- "Local (Mistral)" ,
312- "Local (Mistral) - 4-bit Quantized" ,
313- "Local (Mistral) - 8-bit Quantized" ,
314- ])
315- # Then Ollama models (don't require HF token)
292+ # Only Ollama models (no more local Mistral deployments)
316293 model_choices .extend ([
317- "Ollama - llama3" ,
318- "Ollama - phi-3" ,
319- "Ollama - qwen2"
294+ "qwq" ,
295+ "gemma3" ,
296+ "llama3.3" ,
297+ "phi4" ,
298+ "mistral" ,
299+ "llava" ,
300+ "phi3" ,
301+ "deepseek-r1"
320302 ])
321303 if openai_key :
322- model_choices .append ("OpenAI " )
304+ model_choices .append ("openai " )
323305
324- # Set default model to Ollama - qwen2
325- default_model = "Ollama - qwen2 "
306+ # Set default model to qwq
307+ default_model = "qwq "
326308
327309 # Model Management Tab (First Tab)
328310 with gr .Tab ("Model Management" ):
329311 gr .Markdown ("""
330- ## Model Management
331-
332- Download models in advance to prepare them for use in the chat interface.
333-
334- ### Hugging Face Models
335-
336- For Hugging Face models (Mistral), you'll need a Hugging Face token in your config.yaml file.
337-
338- ### Ollama Models (Default)
339-
340- Ollama models are used by default. For Ollama models, this will pull the model using the Ollama client.
341- Make sure Ollama is installed and running on your system.
342- You can download Ollama from [ollama.com/download](https://ollama.com/download)
312+ ## Model Selection
313+ Choose your preferred model for the conversation.
343314 """ )
344315
345316 with gr .Row ():
346317 with gr .Column ():
347318 model_dropdown = gr .Dropdown (
348319 choices = model_choices ,
349- value = default_model if default_model in model_choices else model_choices [ 0 ] if model_choices else None ,
350- label = "Select Model to Download " ,
351- interactive = True
320+ value = default_model ,
321+ label = "Select Model" ,
322+ info = "Choose the model to use for the conversation"
352323 )
353324 download_button = gr .Button ("Download Selected Model" )
354325 model_status = gr .Textbox (
355326 label = "Download Status" ,
356327 placeholder = "Select a model and click Download to begin..." ,
357328 interactive = False
358329 )
359-
360- with gr .Column ():
361- gr .Markdown ("""
362- ### Model Information
363-
364- **Ollama - qwen2** (DEFAULT): Alibaba's Qwen2 model via Ollama.
365- - Size: ~4GB
366- - Requires Ollama to be installed and running
367- - High-quality model with good performance
368-
369- **Ollama - llama3**: Meta's Llama 3 model via Ollama.
370- - Size: ~4GB
371- - Requires Ollama to be installed and running
372- - Excellent performance and quality
373-
374- **Ollama - phi-3**: Microsoft's Phi-3 model via Ollama.
375- - Size: ~4GB
376- - Requires Ollama to be installed and running
377- - Efficient small model with good performance
378-
379- **Local (Mistral)**: The default Mistral-7B-Instruct-v0.2 model.
380- - Size: ~14GB
381- - VRAM Required: ~8GB
382- - Good balance of quality and speed
383-
384- **Local (Mistral) - 4-bit Quantized**: 4-bit quantized version of Mistral-7B.
385- - Size: ~4GB
386- - VRAM Required: ~4GB
387- - Faster inference with minimal quality loss
388-
389- **Local (Mistral) - 8-bit Quantized**: 8-bit quantized version of Mistral-7B.
390- - Size: ~7GB
391- - VRAM Required: ~6GB
392- - Balance between quality and memory usage
393- """ )
330+
331+ # Add model FAQ section
332+ gr .Markdown ("""
333+ ## Model FAQ
334+
335+ | Model | Parameters | Size | Download Command |
336+ |-------|------------|------|------------------|
337+ | qwq | 32B | 20GB | qwq:latest |
338+ | gemma3 | 4B | 3.3GB | gemma3:latest |
339+ | llama3.3 | 70B | 43GB | llama3.3:latest |
340+ | phi4 | 14B | 9.1GB | phi4:latest |
341+ | mistral | 7B | 4.1GB | mistral:latest |
342+ | llava | 7B | 4.5GB | llava:latest |
343+ | phi3 | 4B | 4.0GB | phi3:latest |
344+ | deepseek-r1 | 7B | 4.7GB | deepseek-r1:latest |
345+
346+ Note: All models are available through Ollama. Make sure Ollama is running on your system.
347+ """ )
394348
395349 # Document Processing Tab
396350 with gr .Tab ("Document Processing" ):
@@ -580,13 +534,30 @@ def main():
580534 try :
581535 import ollama
582536 try :
583- # Check if Ollama is running and qwen2 is available
537+ # Check if Ollama is running and list available models
584538 models = ollama .list ().models
585539 available_models = [model .model for model in models ]
586- if "qwen2" not in available_models and "qwen2:latest" not in available_models :
587- print ("⚠️ Warning: Ollama is running but qwen2 model is not available. Please run 'ollama pull qwen2' or download through the interface." )
588- except Exception :
589- print ("⚠️ Warning: Ollama is installed but not running or encountered an error. The default model may not work." )
540+
541+ # Check if any default models are available
542+ if "qwen2" not in available_models and "qwen2:latest" not in available_models and \
543+ "llama3" not in available_models and "llama3:latest" not in available_models and \
544+ "phi3" not in available_models and "phi3:latest" not in available_models :
545+ print ("⚠️ Warning: Ollama is running but no default models (qwen2, llama3, phi3) are available." )
546+ print ("Please download a model through the Model Management tab or run:" )
547+ print (" ollama pull qwen2" )
548+ print (" ollama pull llama3" )
549+ print (" ollama pull phi3" )
550+ else :
551+ available_default_models = []
552+ for model in ["qwen2" , "llama3" , "phi3" ]:
553+ if model in available_models or f"{ model } :latest" in available_models :
554+ available_default_models .append (model )
555+
556+ print (f"✅ Ollama is running with available default models: { ', ' .join (available_default_models )} " )
557+ print (f"All available models: { ', ' .join (available_models )} " )
558+ except Exception as e :
559+ print (f"⚠️ Warning: Ollama is installed but not running or encountered an error: { str (e )} " )
560+ print ("Please start Ollama before using the interface." )
590561 except ImportError :
591562 print ("⚠️ Warning: Ollama package not installed. Please install with: pip install ollama" )
592563
@@ -674,17 +645,11 @@ def download_model(model_type: str) -> str:
674645
675646 except Exception as e :
676647 return f"❌ Error downloading model: { str (e )} "
677-
678- elif "Ollama" in model_type :
648+ # all ollama models
649+ else :
679650 # Extract model name from model_type
680- if "llama3" in model_type .lower ():
681- model_name = "llama3"
682- elif "phi-3" in model_type .lower ():
683- model_name = "phi3"
684- elif "qwen2" in model_type .lower ():
685- model_name = "qwen2"
686- else :
687- return "❌ Error: Unknown Ollama model type"
651+ # Remove the 'Ollama - ' prefix and any leading/trailing whitespace
652+ model_name = model_type .replace ("Ollama - " , "" ).strip ()
688653
689654 # Use Ollama to pull the model
690655 try :
@@ -732,8 +697,6 @@ def download_model(model_type: str) -> str:
732697 return "❌ Error: Could not connect to Ollama. Please make sure Ollama is installed and running."
733698 except Exception as e :
734699 return f"❌ Error pulling Ollama model: { str (e )} "
735- else :
736- return "❌ Error: Unknown model type"
737700
738701 except Exception as e :
739702 return f"❌ Error: { str (e )} "
0 commit comments