Skip to content

Commit 61a0f12

Browse files
committed
debugging
1 parent c581897 commit 61a0f12

File tree

9 files changed

+474
-32
lines changed

9 files changed

+474
-32
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Python
22
.venv/
3+
myenv/
34
venv/
45
__pycache__/
56
*.pyc

=3.4.0

Lines changed: 424 additions & 0 deletions
Large diffs are not rendered by default.
-8 Bytes
Binary file not shown.
65 Bytes
Binary file not shown.
105 Bytes
Binary file not shown.
10 Bytes
Binary file not shown.
272 Bytes
Binary file not shown.

marooned_env/llm_interface.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
==============================================================
44
Convert observations to prompts and validate student LLM outputs using teacher LLM.
55
6-
Teacher LLM (vLLM) validates student outputs and provides:
6+
Teacher LLM (Ollama Mixtral) validates student outputs and provides:
77
- Corrected actions (environment-compatible)
88
- Process penalties (format/strategy quality)
99
- Critiques (for learning feedback)
@@ -15,9 +15,9 @@
1515
from models import Observation, Action, Position
1616
from config import ActionType, ResourceType, ShipComponent, MapLevel
1717

18-
# vLLM Teacher API Configuration
19-
VLLM_API_URL = "http://localhost:8001/v1/chat/completions"
20-
TEACHER_MODEL_NAME = "unsloth/Meta-Llama-3.1-8B-Instruct"
18+
# Ollama Teacher API Configuration
19+
OLLAMA_API_URL = "http://localhost:11434/api/chat"
20+
TEACHER_MODEL_NAME = "mixtral:8x22b"
2121

2222

2323
# ============================================================================
@@ -966,7 +966,7 @@ def teacher_validate_student_output(
966966
sailor_id: str
967967
) -> Dict[str, Any]:
968968
"""
969-
Send student LLM output to teacher (vLLM) for validation and correction.
969+
Send student LLM output to teacher (Ollama Mixtral) for validation and correction.
970970
971971
This is the CORE of process reward modeling:
972972
- Student generates potentially malformed output
@@ -996,25 +996,28 @@ def teacher_validate_student_output(
996996
GAME STATE:
997997
{full_observation_text}"""
998998

999-
# Query vLLM teacher API
999+
# Query Ollama teacher API
10001000
payload = {
10011001
"model": TEACHER_MODEL_NAME,
10021002
"messages": [
10031003
{"role": "system", "content": TEACHER_SYSTEM_PROMPT},
10041004
{"role": "user", "content": user_prompt}
10051005
],
1006-
"max_tokens": 200,
1007-
"temperature": 0.1,
1008-
"top_p": 1.0,
1006+
"stream": False,
1007+
"options": {
1008+
"temperature": 0.1,
1009+
"top_p": 1.0,
1010+
"num_predict": 200
1011+
}
10091012
}
10101013

10111014
try:
1012-
response = requests.post(VLLM_API_URL, json=payload, timeout=15)
1015+
response = requests.post(OLLAMA_API_URL, json=payload, timeout=30)
10131016
response.raise_for_status()
10141017
data = response.json()
1015-
teacher_response = data["choices"][0]["message"]["content"].strip()
1018+
teacher_response = data["message"]["content"].strip()
10161019
except requests.exceptions.RequestException as e:
1017-
# Fallback if vLLM server unreachable
1020+
# Fallback if Ollama server unreachable
10181021
print(f"⚠️ Teacher API error: {e}")
10191022
teacher_response = f"VALID: NO\nACTION: WAIT\nPENALTY: -2.0\nCRITIQUE: Teacher API unavailable - defaulting to WAIT"
10201023

notebooks/Train_Marooned_RL_Clean.ipynb

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -52,23 +52,28 @@
5252
"source": [
5353
"## ⚙️ Prerequisites\n",
5454
"\n",
55-
"**Start vLLM teacher server in a separate terminal:**\n",
55+
"**Ensure Ollama teacher server is running:**\n",
5656
"\n",
5757
"```bash\n",
58-
"pip install vllm\n",
58+
"# Check if Ollama is running\n",
59+
"sudo systemctl status ollama\n",
5960
"\n",
60-
"vllm serve unsloth/Meta-Llama-3.1-8B-Instruct \\\n",
61-
" --dtype bfloat16 \\\n",
62-
" --max-model-len 8192 \\\n",
63-
" --port 8000\n",
61+
"# If not running, start it\n",
62+
"sudo systemctl start ollama\n",
63+
"\n",
64+
"# Pull Mixtral model if not already available\n",
65+
"ollama pull mixtral:8x22b\n",
66+
"\n",
67+
"# Verify it's available\n",
68+
"ollama list\n",
6469
"```\n",
6570
"\n",
66-
"**Verify:**\n",
71+
"**Test the model:**\n",
6772
"```bash\n",
68-
"curl http://localhost:8000/v1/models\n",
73+
"curl http://localhost:11434/api/tags\n",
6974
"```\n",
7075
"\n",
71-
"Expected: `{\"data\": [{\"id\": \"unsloth/Meta-Llama-3.1-8B-Instruct\", ...}]}`\n",
76+
"Expected: JSON response listing `mixtral:8x22b` in the models array.\n",
7277
"\n",
7378
"---"
7479
]
@@ -310,7 +315,7 @@
310315
"id": "88bdad8b",
311316
"metadata": {},
312317
"source": [
313-
"## 4️⃣ Verify vLLM Teacher Server"
318+
"## 4️⃣ Verify Ollama Teacher Server"
314319
]
315320
},
316321
{
@@ -332,23 +337,32 @@
332337
"source": [
333338
"import requests\n",
334339
"\n",
335-
"VLLM_API_URL = \"http://localhost:8001/v1/chat/completions\"\n",
336-
"VLLM_MODELS_URL = \"http://localhost:8001/v1/models\"\n",
340+
"OLLAMA_API_URL = \"http://localhost:11434/api/chat\"\n",
341+
"OLLAMA_TAGS_URL = \"http://localhost:11434/api/tags\"\n",
337342
"\n",
338-
"print(\"Checking vLLM teacher server...\")\n",
343+
"print(\"Checking Ollama teacher server...\")\n",
339344
"try:\n",
340-
" response = requests.get(VLLM_MODELS_URL, timeout=5)\n",
345+
" response = requests.get(OLLAMA_TAGS_URL, timeout=5)\n",
341346
" if response.status_code == 200:\n",
342347
" models = response.json()\n",
343-
" print(f\"✅ vLLM server running!\")\n",
344-
" print(f\" Model: {[m['id'] for m in models.get('data', [])]}\")\n",
348+
" model_names = [m['name'] for m in models.get('models', [])]\n",
349+
" print(f\"✅ Ollama server running!\")\n",
350+
" print(f\" Available models: {model_names}\")\n",
351+
" \n",
352+
" if 'mixtral:8x22b' in model_names:\n",
353+
" print(f\" ✅ Mixtral model ready for training\")\n",
354+
" else:\n",
355+
" print(f\" ⚠️ Mixtral model not found!\")\n",
356+
" print(f\" Run: ollama pull mixtral:8x22b\")\n",
345357
" else:\n",
346358
" print(f\"⚠️ Server responded with status {response.status_code}\")\n",
347359
"except requests.exceptions.RequestException as e:\n",
348-
" print(f\"vLLM server not reachable!\")\n",
360+
" print(f\"Ollama server not reachable!\")\n",
349361
" print(f\" Error: {e}\")\n",
350-
" print(f\"\\n Start server in separate terminal:\")\n",
351-
" print(f\" vllm serve unsloth/Meta-Llama-3.1-8B-Instruct --dtype bfloat16 --port 8001\")\n",
362+
" print(f\"\\n Start server:\")\n",
363+
" print(f\" sudo systemctl start ollama\")\n",
364+
" print(f\"\\n Pull model:\")\n",
365+
" print(f\" ollama pull mixtral:8x22b\")\n",
352366
" raise SystemExit(\"Teacher server required for training\")\n"
353367
]
354368
},
@@ -357,7 +371,7 @@
357371
"id": "2c07d318",
358372
"metadata": {},
359373
"source": [
360-
"## 5️⃣ Test Teacher Validation"
374+
"## 5️⃣ Test Teacher Validation (Ollama Mixtral)"
361375
]
362376
},
363377
{

0 commit comments

Comments
 (0)