TIGER-AI-Lab
diff --git a/‎.gitignore‎
Lines changed: 12 additions & 1 deletion b/‎.gitignore‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎structeval/check_render_progress.sh‎
Lines changed: 56 additions & 0 deletions b/‎structeval/check_render_progress.sh‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎structeval/cli.py‎
Lines changed: 4 additions & 3 deletions b/‎structeval/cli.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎structeval/debug_images/000100.png‎
-2.93 KB b/‎structeval/debug_images/000100.png‎
-2.93 KB
diff --git a/‎structeval/debug_images/000101.png‎
-711 Bytes b/‎structeval/debug_images/000101.png‎
-711 Bytes
diff --git a/‎structeval/debug_images/000102.png‎
27.4 KB b/‎structeval/debug_images/000102.png‎
27.4 KB
diff --git a/‎structeval/debug_images/000103.png‎
-2.25 KB b/‎structeval/debug_images/000103.png‎
-2.25 KB
diff --git a/‎structeval/debug_images/000104.png‎
28.1 KB b/‎structeval/debug_images/000104.png‎
28.1 KB
diff --git a/‎structeval/debug_images/000105.png‎
-7.69 KB b/‎structeval/debug_images/000105.png‎
-7.69 KB
diff --git a/‎structeval/debug_images/000106.png‎
21.4 KB b/‎structeval/debug_images/000106.png‎
21.4 KB
@@ -171,4 +171,15 @@ cython_debug/
 .pypirc
 
 /test.py
-experiment_results
+experiment_results
+run_all_evaluations.sh
+run_all_renders.sh
+run_inference1.sh
+run_inference2.sh
+run_inference3.sh
+run_inference1copy.sh
+run_inference2copy.sh
+run_inference3copy.sh
+summary_outputs
+render_logs
+debug_images
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Array of model names (directories in experiment_results)
+models=(
+  "gemini-1.5-pro"
+  "Qwen3-4B"
+  "gpt-4o-mini"
+  "gpt-4.1-mini"
+  "Phi-3-mini-128k-instruct"
+  "Llama-3.1-8B-Instruct"
+  "Phi-4-mini-instruct"
+  "Meta-Llama-3-8B-Instruct"
+  "gemini-2.0-flash"
+  "Qwen2.5-7B"
+  "gpt-4o"
+  "o1-mini"
+)
+
+echo "========================================================"
+echo "           RENDER PROGRESS REPORT                       "
+echo "========================================================"
+
+# Check if processes are running
+echo -e "\nCHECKING RUNNING PROCESSES:"
+total_running=0
+for model in "${models[@]}"; do
+  pid=$(ps aux | grep "python -m cli render.*$model" | grep -v grep | awk '{print $2}')
+  if [ -n "$pid" ]; then
+    echo "✅ $model (PID: $pid) is running"
+    ((total_running++))
+  else
+    echo "❌ $model is not running"
+  fi
+done
+
+echo -e "\nTotal running processes: $total_running out of ${#models[@]}"
+
+# Check image output
+echo -e "\nCHECKING GENERATED IMAGES:"
+for model in "${models[@]}"; do
+  img_count=$(find "experiment_results/$model/rendered_images" -type f | wc -l)
+  echo "$model: $img_count images generated"
+done
+
+# Check recent log activity
+echo -e "\nRECENT LOG ACTIVITY:"
+for model in "${models[@]}"; do
+  echo -e "\n--- $model ---"
+  if [ -f "render_logs/${model}_render.log" ]; then
+    tail -5 "render_logs/${model}_render.log"
+  else
+    echo "No log file found"
+  fi
+done
+
+echo -e "\n========================================================" 
@@ -28,12 +28,12 @@ def inference(
 
         queries = [
             f"""{item['query']}
-            \n\nIMPORTANT: Only output the required renderable or executable code. You must start the code with <code> and end the code with </code> (they are code block indicators, not HTML tags). No other text output (explanation, comments, etc.) are allowed.
+            \n\nIMPORTANT: Only output the required output format. You must start the format/code with <|BEGIN_CODE|> and end the format/code with  <|END_CODE|>. No other text output (explanation, comments, etc.) are allowed.  Do not use markdown code fences.
             {"\n\n/no_think" if llm_model_name == "Qwen/Qwen3-4B" else ""}
             """
             for item in data
         ]
-
+ 
         if llm_model_name == "Qwen/Qwen3-4B":
             print("Qwen3-4B I'm here")
 
@@ -186,8 +186,9 @@ def wrapper(*args, **kwargs):
 
         return wrapper
 
-    StructEvalCLI.run_pipeline = async_to_sync(StructEvalCLI.run_pipeline)
     StructEvalCLI.render = async_to_sync(StructEvalCLI.render)
+    StructEvalCLI.evaluate = async_to_sync(StructEvalCLI.evaluate)
+    StructEvalCLI.inference = async_to_sync(StructEvalCLI.inference)
 
     fire.Fire(StructEvalCLI)