Skip to content

Commit 943c330

Browse files
committed
updated prompt before final experiment
1 parent 8dc9116 commit 943c330

File tree

820 files changed

+9190
-12275
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

820 files changed

+9190
-12275
lines changed

.gitignore

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,4 +171,15 @@ cython_debug/
171171
.pypirc
172172

173173
/test.py
174-
experiment_results
174+
experiment_results
175+
run_all_evaluations.sh
176+
run_all_renders.sh
177+
run_inference1.sh
178+
run_inference2.sh
179+
run_inference3.sh
180+
run_inference1copy.sh
181+
run_inference2copy.sh
182+
run_inference3copy.sh
183+
summary_outputs
184+
render_logs
185+
debug_images
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/bin/bash
2+
3+
# Array of model names (directories in experiment_results)
4+
models=(
5+
"gemini-1.5-pro"
6+
"Qwen3-4B"
7+
"gpt-4o-mini"
8+
"gpt-4.1-mini"
9+
"Phi-3-mini-128k-instruct"
10+
"Llama-3.1-8B-Instruct"
11+
"Phi-4-mini-instruct"
12+
"Meta-Llama-3-8B-Instruct"
13+
"gemini-2.0-flash"
14+
"Qwen2.5-7B"
15+
"gpt-4o"
16+
"o1-mini"
17+
)
18+
19+
echo "========================================================"
20+
echo " RENDER PROGRESS REPORT "
21+
echo "========================================================"
22+
23+
# Check if processes are running
24+
echo -e "\nCHECKING RUNNING PROCESSES:"
25+
total_running=0
26+
for model in "${models[@]}"; do
27+
pid=$(ps aux | grep "python -m cli render.*$model" | grep -v grep | awk '{print $2}')
28+
if [ -n "$pid" ]; then
29+
echo "$model (PID: $pid) is running"
30+
((total_running++))
31+
else
32+
echo "$model is not running"
33+
fi
34+
done
35+
36+
echo -e "\nTotal running processes: $total_running out of ${#models[@]}"
37+
38+
# Check image output
39+
echo -e "\nCHECKING GENERATED IMAGES:"
40+
for model in "${models[@]}"; do
41+
img_count=$(find "experiment_results/$model/rendered_images" -type f | wc -l)
42+
echo "$model: $img_count images generated"
43+
done
44+
45+
# Check recent log activity
46+
echo -e "\nRECENT LOG ACTIVITY:"
47+
for model in "${models[@]}"; do
48+
echo -e "\n--- $model ---"
49+
if [ -f "render_logs/${model}_render.log" ]; then
50+
tail -5 "render_logs/${model}_render.log"
51+
else
52+
echo "No log file found"
53+
fi
54+
done
55+
56+
echo -e "\n========================================================"

structeval/cli.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ def inference(
2828

2929
queries = [
3030
f"""{item['query']}
31-
\n\nIMPORTANT: Only output the required renderable or executable code. You must start the code with <code> and end the code with </code> (they are code block indicators, not HTML tags). No other text output (explanation, comments, etc.) are allowed.
31+
\n\nIMPORTANT: Only output the required output format. You must start the format/code with <|BEGIN_CODE|> and end the format/code with <|END_CODE|>. No other text output (explanation, comments, etc.) are allowed. Do not use markdown code fences.
3232
{"\n\n/no_think" if llm_model_name == "Qwen/Qwen3-4B" else ""}
3333
"""
3434
for item in data
3535
]
36-
36+
3737
if llm_model_name == "Qwen/Qwen3-4B":
3838
print("Qwen3-4B I'm here")
3939

@@ -186,8 +186,9 @@ def wrapper(*args, **kwargs):
186186

187187
return wrapper
188188

189-
StructEvalCLI.run_pipeline = async_to_sync(StructEvalCLI.run_pipeline)
190189
StructEvalCLI.render = async_to_sync(StructEvalCLI.render)
190+
StructEvalCLI.evaluate = async_to_sync(StructEvalCLI.evaluate)
191+
StructEvalCLI.inference = async_to_sync(StructEvalCLI.inference)
191192

192193
fire.Fire(StructEvalCLI)
193194

structeval/debug_images/000100.png

-2.93 KB

structeval/debug_images/000101.png

-711 Bytes

structeval/debug_images/000102.png

27.4 KB

structeval/debug_images/000103.png

-2.25 KB

structeval/debug_images/000104.png

28.1 KB

structeval/debug_images/000105.png

-7.69 KB

structeval/debug_images/000106.png

21.4 KB

0 commit comments

Comments
 (0)