atchudhansg
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎=3.4.0‎
Lines changed: 424 additions & 0 deletions b/‎=3.4.0‎
Lines changed: 424 additions & 0 deletions
diff --git a/‎marooned_env/__pycache__/config.cpython-312.pyc‎
-8 Bytes b/‎marooned_env/__pycache__/config.cpython-312.pyc‎
-8 Bytes
diff --git a/‎marooned_env/__pycache__/environment.cpython-312.pyc‎
65 Bytes b/‎marooned_env/__pycache__/environment.cpython-312.pyc‎
65 Bytes
diff --git a/‎marooned_env/__pycache__/game_state.cpython-312.pyc‎
105 Bytes b/‎marooned_env/__pycache__/game_state.cpython-312.pyc‎
105 Bytes
diff --git a/‎marooned_env/__pycache__/llm_interface.cpython-312.pyc‎
10 Bytes b/‎marooned_env/__pycache__/llm_interface.cpython-312.pyc‎
10 Bytes
diff --git a/‎marooned_env/__pycache__/models.cpython-312.pyc‎
272 Bytes b/‎marooned_env/__pycache__/models.cpython-312.pyc‎
272 Bytes
diff --git a/‎marooned_env/llm_interface.py‎
Lines changed: 15 additions & 12 deletions b/‎marooned_env/llm_interface.py‎
Lines changed: 15 additions & 12 deletions
diff --git a/‎notebooks/Train_Marooned_RL_Clean.ipynb‎
Lines changed: 34 additions & 20 deletions b/‎notebooks/Train_Marooned_RL_Clean.ipynb‎
Lines changed: 34 additions & 20 deletions
@@ -1,5 +1,6 @@
 # Python
 .venv/
+myenv/
 venv/
 __pycache__/
 *.pyc
 
@@ -3,7 +3,7 @@
 ==============================================================
 Convert observations to prompts and validate student LLM outputs using teacher LLM.
 
-Teacher LLM (vLLM) validates student outputs and provides:
+Teacher LLM (Ollama Mixtral) validates student outputs and provides:
 - Corrected actions (environment-compatible)
 - Process penalties (format/strategy quality)
 - Critiques (for learning feedback)
@@ -15,9 +15,9 @@
 from models import Observation, Action, Position
 from config import ActionType, ResourceType, ShipComponent, MapLevel
 
-# vLLM Teacher API Configuration
-VLLM_API_URL = "http://localhost:8001/v1/chat/completions"
-TEACHER_MODEL_NAME = "unsloth/Meta-Llama-3.1-8B-Instruct"
+# Ollama Teacher API Configuration
+OLLAMA_API_URL = "http://localhost:11434/api/chat"
+TEACHER_MODEL_NAME = "mixtral:8x22b"
 
 
 # ============================================================================
@@ -966,7 +966,7 @@ def teacher_validate_student_output(
     sailor_id: str
 ) -> Dict[str, Any]:
     """
-    Send student LLM output to teacher (vLLM) for validation and correction.
+    Send student LLM output to teacher (Ollama Mixtral) for validation and correction.
     
     This is the CORE of process reward modeling:
     - Student generates potentially malformed output
@@ -996,25 +996,28 @@ def teacher_validate_student_output(
 GAME STATE:
 {full_observation_text}"""
 
-    # Query vLLM teacher API
+    # Query Ollama teacher API
     payload = {
         "model": TEACHER_MODEL_NAME,
         "messages": [
             {"role": "system", "content": TEACHER_SYSTEM_PROMPT},
             {"role": "user", "content": user_prompt}
         ],
-        "max_tokens": 200,
-        "temperature": 0.1,
-        "top_p": 1.0,
+        "stream": False,
+        "options": {
+            "temperature": 0.1,
+            "top_p": 1.0,
+            "num_predict": 200
+        }
     }
 
     try:
-        response = requests.post(VLLM_API_URL, json=payload, timeout=15)
+        response = requests.post(OLLAMA_API_URL, json=payload, timeout=30)
         response.raise_for_status()
         data = response.json()
-        teacher_response = data["choices"][0]["message"]["content"].strip()
+        teacher_response = data["message"]["content"].strip()
     except requests.exceptions.RequestException as e:
-        # Fallback if vLLM server unreachable
+        # Fallback if Ollama server unreachable
         print(f"⚠️  Teacher API error: {e}")
         teacher_response = f"VALID: NO\nACTION: WAIT\nPENALTY: -2.0\nCRITIQUE: Teacher API unavailable - defaulting to WAIT"
 
 
@@ -52,23 +52,28 @@
    "source": [
     "## ⚙️ Prerequisites\n",
     "\n",
-    "**Start vLLM teacher server in a separate terminal:**\n",
+    "**Ensure Ollama teacher server is running:**\n",
     "\n",
     "```bash\n",
-    "pip install vllm\n",
+    "# Check if Ollama is running\n",
+    "sudo systemctl status ollama\n",
     "\n",
-    "vllm serve unsloth/Meta-Llama-3.1-8B-Instruct \\\n",
-    "    --dtype bfloat16 \\\n",
-    "    --max-model-len 8192 \\\n",
-    "    --port 8000\n",
+    "# If not running, start it\n",
+    "sudo systemctl start ollama\n",
+    "\n",
+    "# Pull Mixtral model if not already available\n",
+    "ollama pull mixtral:8x22b\n",
+    "\n",
+    "# Verify it's available\n",
+    "ollama list\n",
     "```\n",
     "\n",
-    "**Verify:**\n",
+    "**Test the model:**\n",
     "```bash\n",
-    "curl http://localhost:8000/v1/models\n",
+    "curl http://localhost:11434/api/tags\n",
     "```\n",
     "\n",
-    "Expected: `{\"data\": [{\"id\": \"unsloth/Meta-Llama-3.1-8B-Instruct\", ...}]}`\n",
+    "Expected: JSON response listing `mixtral:8x22b` in the models array.\n",
     "\n",
     "---"
    ]
@@ -310,7 +315,7 @@
    "id": "88bdad8b",
    "metadata": {},
    "source": [
-    "## 4️⃣ Verify vLLM Teacher Server"
+    "## 4️⃣ Verify Ollama Teacher Server"
    ]
   },
   {
@@ -332,23 +337,32 @@
    "source": [
     "import requests\n",
     "\n",
-    "VLLM_API_URL = \"http://localhost:8001/v1/chat/completions\"\n",
-    "VLLM_MODELS_URL = \"http://localhost:8001/v1/models\"\n",
+    "OLLAMA_API_URL = \"http://localhost:11434/api/chat\"\n",
+    "OLLAMA_TAGS_URL = \"http://localhost:11434/api/tags\"\n",
     "\n",
-    "print(\"Checking vLLM teacher server...\")\n",
+    "print(\"Checking Ollama teacher server...\")\n",
     "try:\n",
-    "    response = requests.get(VLLM_MODELS_URL, timeout=5)\n",
+    "    response = requests.get(OLLAMA_TAGS_URL, timeout=5)\n",
     "    if response.status_code == 200:\n",
     "        models = response.json()\n",
-    "        print(f\"✅ vLLM server running!\")\n",
-    "        print(f\"   Model: {[m['id'] for m in models.get('data', [])]}\")\n",
+    "        model_names = [m['name'] for m in models.get('models', [])]\n",
+    "        print(f\"✅ Ollama server running!\")\n",
+    "        print(f\"   Available models: {model_names}\")\n",
+    "        \n",
+    "        if 'mixtral:8x22b' in model_names:\n",
+    "            print(f\"   ✅ Mixtral model ready for training\")\n",
+    "        else:\n",
+    "            print(f\"   ⚠️  Mixtral model not found!\")\n",
+    "            print(f\"   Run: ollama pull mixtral:8x22b\")\n",
     "    else:\n",
     "        print(f\"⚠️  Server responded with status {response.status_code}\")\n",
     "except requests.exceptions.RequestException as e:\n",
-    "    print(f\"❌ vLLM server not reachable!\")\n",
+    "    print(f\"❌ Ollama server not reachable!\")\n",
     "    print(f\"   Error: {e}\")\n",
-    "    print(f\"\\n   Start server in separate terminal:\")\n",
-    "    print(f\"   vllm serve unsloth/Meta-Llama-3.1-8B-Instruct --dtype bfloat16 --port 8001\")\n",
+    "    print(f\"\\n   Start server:\")\n",
+    "    print(f\"   sudo systemctl start ollama\")\n",
+    "    print(f\"\\n   Pull model:\")\n",
+    "    print(f\"   ollama pull mixtral:8x22b\")\n",
     "    raise SystemExit(\"Teacher server required for training\")\n"
    ]
   },
@@ -357,7 +371,7 @@
    "id": "2c07d318",
    "metadata": {},
    "source": [
-    "## 5️⃣ Test Teacher Validation"
+    "## 5️⃣ Test Teacher Validation (Ollama Mixtral)"
    ]
   },
   {
-Original file line number
+Diff line change
@@ @@ -1,5 +1,6 @@ @@
 # Python
 .venv/
 +myenv/
 venv/
 __pycache__/
 *.pyc