@@ -985,11 +985,38 @@ start_and_show() {
985985 echo " "
986986 bash " $IBEX_DIR /scripts/launchd-service.sh" install
987987
988- # ── Verify MCP servers are healthy ──────────────────────── ──
988+ # ── Verify MCP servers are healthy (with full auto-diagnostics) ──
989989 echo " "
990990 echo " Checking MCP server health..."
991991 echo " "
992992
993+ # Pre-flight: check node is available
994+ local node_path
995+ node_path=$( which node 2> /dev/null)
996+ if [ -z " $node_path " ]; then
997+ for p in /opt/homebrew/bin/node /usr/local/bin/node; do
998+ [ -x " $p " ] && node_path=" $p " && break
999+ done
1000+ fi
1001+ if [ -z " $node_path " ]; then
1002+ printf " ${RED} ✗ Node.js is not installed!${NC} \n"
1003+ printf " MCP servers require Node.js. Install it:\n"
1004+ printf " brew install node\n"
1005+ printf " Then re-run the installer.\n\n"
1006+ return 1
1007+ fi
1008+ printf " ${GREEN} ✓${NC} Node.js: $node_path ($( ${node_path} --version) )\n"
1009+
1010+ # Pre-flight: check node_modules exist
1011+ if [ ! -d " $IBEX_DIR /servers/node_modules" ]; then
1012+ printf " ${RED} ✗ node_modules missing!${NC} \n"
1013+ printf " Running npm install...\n"
1014+ (cd " $IBEX_DIR /servers" && npm install --production 2>&1 ) | tail -3
1015+ else
1016+ printf " ${GREEN} ✓${NC} node_modules present\n"
1017+ fi
1018+
1019+ echo " "
9931020 local servers_ok=0 servers_fail=0
9941021
9951022 for port in 3001 3002 3003 3005 3006; do
@@ -998,14 +1025,17 @@ start_and_show() {
9981025 3001) sname=" Slack" ;; 3002) sname=" Notion" ;; 3003) sname=" Jira" ;;
9991026 3005) sname=" ServiceNow" ;; 3006) sname=" Percona Docs" ;;
10001027 esac
1028+ local sname_lower
1029+ sname_lower=$( echo " $sname " | tr ' [:upper:]' ' [:lower:]' )
1030+
10011031 # Check if this server is configured (has a launchd plist)
10021032 if ! ls ~ /Library/LaunchAgents/com.ibex.mcp-* .plist 2> /dev/null | xargs grep -l " \" $port \" " > /dev/null 2>&1 ; then
10031033 continue # not configured, skip
10041034 fi
10051035
1006- # Wait up to 5 seconds for the server to respond
1036+ # Wait up to 8 seconds for the server to respond
10071037 local healthy=false
1008- for i in 1 2 3 4 5; do
1038+ for i in 1 2 3 4 5 6 7 8 ; do
10091039 if curl -sf --connect-timeout 1 " http://localhost:${port} /health" > /dev/null 2>&1 ; then
10101040 healthy=true
10111041 break
@@ -1018,22 +1048,97 @@ start_and_show() {
10181048 servers_ok=$(( servers_ok + 1 ))
10191049 else
10201050 printf " ${RED} ✗${NC} %s server (port %s) — NOT responding\n" " $sname " " $port "
1021- # Show actual error from logs to help diagnose
1022- local sname_lower=$( echo " $sname " | tr ' [:upper:]' ' [:lower:]' )
1051+
1052+ # ── Auto-diagnose why ──
1053+ local label=" com.ibex.mcp.${sname_lower} "
10231054 local errlog=" $HOME /.ibex-logs/${sname_lower} .err"
10241055 local outlog=" $HOME /.ibex-logs/${sname_lower} .log"
1025- if [ -f " $errlog " ] && [ -s " $errlog " ] ; then
1026- printf " Last error: %s\n " " $( tail -1 " $errlog " ) "
1027- elif [ -f " $outlog " ] && [ -s " $outlog " ] ; then
1028- printf " Last log: %s\n " " $( tail -1 " $outlog " ) "
1056+
1057+ # 1. Is launchd tracking it?
1058+ if ! launchctl list 2> /dev/null | grep -q " $label " ; then
1059+ printf " ${YELLOW} ⚠ ${NC} launchd service not loaded\n "
10291060 else
1030- printf " No logs found — launchd may not have started the service\n"
1061+ local lpid
1062+ lpid=$( launchctl list 2> /dev/null | grep " $label " | awk ' {print $1}' )
1063+ if [ " $lpid " = " -" ] || [ -z " $lpid " ]; then
1064+ printf " ${YELLOW} ⚠${NC} launchd loaded but process not running (crashed)\n"
1065+ else
1066+ printf " ${YELLOW} ⚠${NC} process running (PID $lpid ) but port not responding\n"
1067+ fi
10311068 fi
1032- # Check if launchd knows about it
1033- local label=" com.ibex.mcp.${sname_lower} "
1034- if ! launchctl list 2> /dev/null | grep -q " $label " ; then
1035- printf " launchd service not loaded — re-run: ~/IBEX/scripts/launchd-service.sh install\n"
1069+
1070+ # 2. Is something else using the port?
1071+ local port_owner
1072+ port_owner=$( lsof -ti:${port} 2> /dev/null | head -1)
1073+ if [ -n " $port_owner " ]; then
1074+ local port_cmd
1075+ port_cmd=$( ps -p " $port_owner " -o comm= 2> /dev/null)
1076+ printf " ${YELLOW} ⚠${NC} port %s in use by PID %s (%s)\n" " $port " " $port_owner " " $port_cmd "
1077+ fi
1078+
1079+ # 3. Show last meaningful log lines
1080+ if [ -f " $errlog " ] && [ -s " $errlog " ]; then
1081+ # Filter out startup messages, show actual errors
1082+ local real_errors
1083+ real_errors=$( grep -v " Streamable HTTP on" " $errlog " | tail -3)
1084+ if [ -n " $real_errors " ]; then
1085+ printf " ${YELLOW} ⚠${NC} error log:\n"
1086+ echo " $real_errors " | while read -r line; do
1087+ printf " %s\n" " $line "
1088+ done
1089+ fi
1090+ fi
1091+ if [ -f " $outlog " ] && [ -s " $outlog " ]; then
1092+ local real_out
1093+ real_out=$( grep -iE " error|fatal|EADDRINUSE|EACCES|MODULE_NOT_FOUND|Cannot find" " $outlog " | tail -3)
1094+ if [ -n " $real_out " ]; then
1095+ printf " ${YELLOW} ⚠${NC} stdout errors:\n"
1096+ echo " $real_out " | while read -r line; do
1097+ printf " %s\n" " $line "
1098+ done
1099+ fi
1100+ fi
1101+
1102+ # 4. No logs at all?
1103+ if [ ! -s " $errlog " ] && [ ! -s " $outlog " ]; then
1104+ printf " ${YELLOW} ⚠${NC} no logs found — server never started\n"
1105+ printf " check plist: cat ~/Library/LaunchAgents/${label} .plist\n"
10361106 fi
1107+
1108+ # 5. Auto-recovery: try killing port + direct start
1109+ printf " ${YELLOW} →${NC} attempting recovery...\n"
1110+ lsof -ti:${port} 2> /dev/null | xargs kill -9 2> /dev/null || true
1111+ sleep 1
1112+
1113+ # Find the server script for this port
1114+ local server_script=" "
1115+ case $port in
1116+ 3001) server_script=" servers/slack.js" ;;
1117+ 3002) server_script=" servers/notion.js" ;;
1118+ 3003) server_script=" servers/jira.js" ;;
1119+ 3005) server_script=" servers/servicenow.js" ;;
1120+ 3006) server_script=" servers/percona-docs.js" ;;
1121+ esac
1122+
1123+ if [ -n " $server_script " ] && [ -f " $IBEX_DIR /$server_script " ]; then
1124+ # Source env and start directly
1125+ set -a; source " $HOME /.ibex-mcp.env" 2> /dev/null; set +a
1126+ nohup " $node_path " " $IBEX_DIR /$server_script " --http \
1127+ >> " $errlog " 2>&1 &
1128+ local recovery_pid=$!
1129+ sleep 3
1130+ if curl -sf --connect-timeout 2 " http://localhost:${port} /health" > /dev/null 2>&1 ; then
1131+ printf " ${GREEN} ✓${NC} recovered! running as PID %s\n" " $recovery_pid "
1132+ servers_fail=$(( servers_fail - 1 )) # undo the fail count
1133+ servers_ok=$(( servers_ok + 1 ))
1134+ else
1135+ printf " ${RED} ✗${NC} recovery failed — last 5 log lines:\n"
1136+ tail -5 " $errlog " 2> /dev/null | while read -r line; do
1137+ printf " %s\n" " $line "
1138+ done
1139+ fi
1140+ fi
1141+
10371142 servers_fail=$(( servers_fail + 1 ))
10381143 fi
10391144 done
@@ -1051,7 +1156,7 @@ start_and_show() {
10511156 printf " \n ${GREEN} ✓${NC} Docker → host networking OK\n"
10521157 else
10531158 printf " \n ${RED} ✗${NC} Docker CANNOT reach MCP servers on host\n"
1054- printf " This means Open WebUI won't be able to use any tools.\n"
1159+ printf " Open WebUI won't be able to use tools.\n"
10551160 printf " Fix: Docker Desktop → Settings → General → enable 'Allow host networking'\n"
10561161 printf " Then re-run this installer.\n"
10571162 fi
@@ -1060,7 +1165,10 @@ start_and_show() {
10601165
10611166 if [ " $servers_fail " -gt 0 ]; then
10621167 printf " \n ${YELLOW} !${NC} %s server(s) failed health check — tools may not work\n" " $servers_fail "
1063- printf " Run: ~/IBEX/scripts/launchd-service.sh status\n"
1168+ printf " Full diagnostics: ~/IBEX/scripts/launchd-service.sh status\n"
1169+ printf " Logs: cat ~/.ibex-logs/*.err\n"
1170+ else
1171+ printf " \n ${GREEN} ✓${NC} All %s server(s) healthy\n" " $servers_ok "
10641172 fi
10651173
10661174 # Configure models with system prompt and tools
0 commit comments