11import streamlit as st
22import requests
33import time
4+ import concurrent .futures
45
56st .set_page_config (page_title = "LLM Comparison" , layout = "wide" )
67
@@ -50,12 +51,16 @@ def remove_model(index):
5051 st .session_state .selected_models .pop (index )
5152
5253for i in range (st .session_state .model_count ):
53- col1 , col2 = st .columns ([0.97 , 0.02 ])
54+ col1 , col2 = st .columns ([0.97 , 0.02 ])
5455 with col1 :
56+ # Ensure the list is long enough
57+ if i >= len (st .session_state .selected_models ):
58+ st .session_state .selected_models .append ("" )
59+
5560 st .session_state .selected_models [i ] = st .selectbox (
5661 f"Model { i + 1 } " ,
5762 models_available ,
58- index = 0 if i >= len (st .session_state .selected_models ) or not st .session_state .selected_models [i ] else models_available .index (st .session_state .selected_models [i ]),
63+ index = 0 if i >= len (st .session_state .selected_models ) or not st .session_state .selected_models [i ] else ( models_available .index (st .session_state .selected_models [i ]) if st . session_state . selected_models [ i ] in models_available else 0 ),
5964 key = f"model_select_{ i } "
6065 )
6166 with col2 :
@@ -72,47 +77,75 @@ def remove_model(index):
7277with col_run :
7378 run_clicked = st .button ("Run Models" , type = "primary" )
7479
80+ def query_ollama_model (model_name , prompt_text ):
81+ """Function to query a single Ollama model."""
82+ try :
83+ start_time = time .time ()
84+ res = requests .post (
85+ "http://localhost:11434/api/generate" ,
86+ json = {"model" : model_name , "prompt" : prompt_text , "stream" : False },
87+ headers = {"Content-Type" : "application/json" },
88+ )
89+ res .raise_for_status ()
90+ response_data = res .json ()
91+ end_time = time .time ()
92+
93+ duration = round (end_time - start_time , 2 )
94+ content = response_data .get ("response" , "" )
95+ eval_count = response_data .get ("eval_count" , len (content .split ()))
96+ eval_rate = response_data .get ("eval_rate" , round (eval_count / duration , 2 ) if duration > 0 else 0 )
97+
98+ return {
99+ "model" : model_name ,
100+ "duration" : duration ,
101+ "eval_count" : eval_count ,
102+ "eval_rate" : eval_rate ,
103+ "response" : content
104+ }
105+ except Exception as e :
106+ return {
107+ "model" : model_name ,
108+ "duration" : 0 ,
109+ "eval_count" : 0 ,
110+ "eval_rate" : 0 ,
111+ "response" : f"Error: { e } "
112+ }
113+
75114if run_clicked and prompt and selected_models_filtered :
76115 responses = []
77-
78- response_placeholders = [st .empty () for _ in selected_models_filtered ]
79-
80- for i , model in enumerate (selected_models_filtered ):
81- try :
82- with st .spinner (f"Running { model } ..." ):
83- start_time = time .time ()
84- res = requests .post (
85- "http://localhost:11434/api/generate" ,
86- json = {"model" : model , "prompt" : prompt , "stream" : False },
87- headers = {"Content-Type" : "application/json" },
88- )
89- res .raise_for_status ()
90- response_data = res .json ()
91- end_time = time .time ()
92-
93- duration = round (end_time - start_time , 2 )
94- content = response_data .get ("response" , "" )
95- eval_count = response_data .get ("eval_count" , len (content .split ()))
96- eval_rate = response_data .get ("eval_rate" , round (eval_count / duration , 2 ))
97-
98- responses .append ({
99- "model" : model ,
100- "duration" : duration ,
101- "eval_count" : eval_count ,
102- "eval_rate" : eval_rate ,
103- "response" : content
104- })
105- except Exception as e :
106- responses .append ({
107- "model" : model ,
108- "duration" : 0 ,
109- "eval_count" : 0 ,
110- "eval_rate" : 0 ,
111- "response" : f"Error: { e } "
112- })
113-
114- cols = st .columns (len (responses ))
115- for i , res in enumerate (responses ):
116+
117+ # Create placeholders for immediate feedback
118+ response_placeholders = {model : st .empty () for model in selected_models_filtered }
119+
120+ with concurrent .futures .ThreadPoolExecutor (max_workers = len (selected_models_filtered )) as executor :
121+ # Submit tasks to the thread pool
122+ future_to_model = {executor .submit (query_ollama_model , model , prompt ): model for model in selected_models_filtered }
123+
124+ # Iterate as futures complete
125+ for future in concurrent .futures .as_completed (future_to_model ):
126+ model_name = future_to_model [future ]
127+ try :
128+ res = future .result ()
129+ responses .append (res )
130+ except Exception as exc :
131+ responses .append ({
132+ "model" : model_name ,
133+ "duration" : 0 ,
134+ "eval_count" : 0 ,
135+ "eval_rate" : 0 ,
136+ "response" : f"Error: { exc } "
137+ })
138+
139+ # Sort responses by the order of selected models for consistent display
140+ ordered_responses = []
141+ for model in selected_models_filtered :
142+ for res in responses :
143+ if res ["model" ] == model :
144+ ordered_responses .append (res )
145+ break
146+
147+ cols = st .columns (len (ordered_responses ))
148+ for i , res in enumerate (ordered_responses ):
116149 with cols [i ]:
117150 st .markdown (
118151 f"### <span style='color:#3366cc'>{ res ['model' ]} </span>" if i % 2 == 0 else f"### <span style='color:#cc0000'>{ res ['model' ]} </span>" ,
0 commit comments