Skip to content

Commit ae399d9

Browse files
authored
Queries all selected models at same time
In earlier script, even though we select 4 models, it queries first model, waits for its output, after it is done, it moves to second model and waits for its output, after it is done, it moves to third model and so on.. Changed it to Parallel execution so that all models are queried at the same time.
1 parent 96ed9a9 commit ae399d9

File tree

1 file changed

+74
-41
lines changed

1 file changed

+74
-41
lines changed

Horizontal View - app.py

Lines changed: 74 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import streamlit as st
22
import requests
33
import time
4+
import concurrent.futures
45

56
st.set_page_config(page_title="LLM Comparison", layout="wide")
67

@@ -50,12 +51,16 @@ def remove_model(index):
5051
st.session_state.selected_models.pop(index)
5152

5253
for i in range(st.session_state.model_count):
53-
col1, col2 = st.columns([0.97, 0.02])
54+
col1, col2 = st.columns([0.97, 0.02])
5455
with col1:
56+
# Ensure the list is long enough
57+
if i >= len(st.session_state.selected_models):
58+
st.session_state.selected_models.append("")
59+
5560
st.session_state.selected_models[i] = st.selectbox(
5661
f"Model {i+1}",
5762
models_available,
58-
index=0 if i >= len(st.session_state.selected_models) or not st.session_state.selected_models[i] else models_available.index(st.session_state.selected_models[i]),
63+
index=0 if i >= len(st.session_state.selected_models) or not st.session_state.selected_models[i] else (models_available.index(st.session_state.selected_models[i]) if st.session_state.selected_models[i] in models_available else 0),
5964
key=f"model_select_{i}"
6065
)
6166
with col2:
@@ -72,47 +77,75 @@ def remove_model(index):
7277
with col_run:
7378
run_clicked = st.button("Run Models", type="primary")
7479

80+
def query_ollama_model(model_name, prompt_text):
81+
"""Function to query a single Ollama model."""
82+
try:
83+
start_time = time.time()
84+
res = requests.post(
85+
"http://localhost:11434/api/generate",
86+
json={"model": model_name, "prompt": prompt_text, "stream": False},
87+
headers={"Content-Type": "application/json"},
88+
)
89+
res.raise_for_status()
90+
response_data = res.json()
91+
end_time = time.time()
92+
93+
duration = round(end_time - start_time, 2)
94+
content = response_data.get("response", "")
95+
eval_count = response_data.get("eval_count", len(content.split()))
96+
eval_rate = response_data.get("eval_rate", round(eval_count / duration, 2) if duration > 0 else 0)
97+
98+
return {
99+
"model": model_name,
100+
"duration": duration,
101+
"eval_count": eval_count,
102+
"eval_rate": eval_rate,
103+
"response": content
104+
}
105+
except Exception as e:
106+
return {
107+
"model": model_name,
108+
"duration": 0,
109+
"eval_count": 0,
110+
"eval_rate": 0,
111+
"response": f"Error: {e}"
112+
}
113+
75114
if run_clicked and prompt and selected_models_filtered:
76115
responses = []
77-
78-
response_placeholders = [st.empty() for _ in selected_models_filtered]
79-
80-
for i, model in enumerate(selected_models_filtered):
81-
try:
82-
with st.spinner(f"Running {model}..."):
83-
start_time = time.time()
84-
res = requests.post(
85-
"http://localhost:11434/api/generate",
86-
json={"model": model, "prompt": prompt, "stream": False},
87-
headers={"Content-Type": "application/json"},
88-
)
89-
res.raise_for_status()
90-
response_data = res.json()
91-
end_time = time.time()
92-
93-
duration = round(end_time - start_time, 2)
94-
content = response_data.get("response", "")
95-
eval_count = response_data.get("eval_count", len(content.split()))
96-
eval_rate = response_data.get("eval_rate", round(eval_count / duration, 2))
97-
98-
responses.append({
99-
"model": model,
100-
"duration": duration,
101-
"eval_count": eval_count,
102-
"eval_rate": eval_rate,
103-
"response": content
104-
})
105-
except Exception as e:
106-
responses.append({
107-
"model": model,
108-
"duration": 0,
109-
"eval_count": 0,
110-
"eval_rate": 0,
111-
"response": f"Error: {e}"
112-
})
113-
114-
cols = st.columns(len(responses))
115-
for i, res in enumerate(responses):
116+
117+
# Create placeholders for immediate feedback
118+
response_placeholders = {model: st.empty() for model in selected_models_filtered}
119+
120+
with concurrent.futures.ThreadPoolExecutor(max_workers=len(selected_models_filtered)) as executor:
121+
# Submit tasks to the thread pool
122+
future_to_model = {executor.submit(query_ollama_model, model, prompt): model for model in selected_models_filtered}
123+
124+
# Iterate as futures complete
125+
for future in concurrent.futures.as_completed(future_to_model):
126+
model_name = future_to_model[future]
127+
try:
128+
res = future.result()
129+
responses.append(res)
130+
except Exception as exc:
131+
responses.append({
132+
"model": model_name,
133+
"duration": 0,
134+
"eval_count": 0,
135+
"eval_rate": 0,
136+
"response": f"Error: {exc}"
137+
})
138+
139+
# Sort responses by the order of selected models for consistent display
140+
ordered_responses = []
141+
for model in selected_models_filtered:
142+
for res in responses:
143+
if res["model"] == model:
144+
ordered_responses.append(res)
145+
break
146+
147+
cols = st.columns(len(ordered_responses))
148+
for i, res in enumerate(ordered_responses):
116149
with cols[i]:
117150
st.markdown(
118151
f"### <span style='color:#3366cc'>{res['model']}</span>" if i % 2 == 0 else f"### <span style='color:#cc0000'>{res['model']}</span>",

0 commit comments

Comments
 (0)