Skip to content

Commit fbed94d

Browse files
committed
robustness fixes and server config improvements
- add thread locks for agent_inputs and view_angles - fix JSON error handling in state.py (return False, don't raise) - reset damage tracking on map change - log command handler errors to Redis - enable F3 ready-up in server config (g_doWarmup, g_warmupReadyPercentage) add docker health checks and resource limits - add healthcheck.sh script that verifies plugin is running via Redis - add health check config to docker-compose (30s interval, 60s start period) - add 512M memory limit to quake server containers - apply same config to all 4 parallel servers in docker-compose.multi.yml
1 parent e3efa3c commit fbed94d

File tree

9 files changed

+366
-133
lines changed

9 files changed

+366
-133
lines changed

Dockerfile.server

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,11 @@ COPY --from=builder /build/minqlx/python/minqlx.zip /qlds/
5151
# Copy our configuration files
5252
COPY docker/server.cfg /qlds/baseq3/server.cfg
5353
COPY docker/start.sh /start.sh
54-
RUN chmod +x /start.sh
54+
COPY docker/healthcheck.sh /healthcheck.sh
55+
RUN chmod +x /start.sh /healthcheck.sh
5556

5657
# Set ownership
57-
RUN chown -R qldsuser:qldsuser /qlds /start.sh
58+
RUN chown -R qldsuser:qldsuser /qlds /start.sh /healthcheck.sh
5859

5960
# Switch to non-root user
6061
USER qldsuser

QuakeLiveInterface/connection.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -350,26 +350,52 @@ def set(self, key: str, value: str, ex: Optional[int] = None) -> bool:
350350
logger.warning(f"Set failed for {key}: {e}")
351351
return False
352352

353-
def close(self):
354-
"""Close the Redis connection and stop health monitoring."""
355-
logger.info("Closing Redis connection")
353+
def close(self, timeout: float = 2.0):
354+
"""
355+
Close the Redis connection and stop health monitoring.
356+
357+
Args:
358+
timeout: Maximum time to wait for threads to stop (seconds)
359+
"""
360+
logger.info("Closing Redis connection...")
361+
362+
# Signal health monitor to stop
356363
self._health_monitor_running = False
357364

365+
# Wait for health monitor thread to finish
366+
if self._health_monitor_thread and self._health_monitor_thread.is_alive():
367+
logger.debug("Waiting for health monitor thread to stop...")
368+
self._health_monitor_thread.join(timeout=timeout)
369+
if self._health_monitor_thread.is_alive():
370+
logger.warning("Health monitor thread did not stop in time")
371+
358372
# Close all pubsub subscriptions
359-
for pubsub in list(self._pubsub_subscriptions.values()):
373+
for channel, pubsub in list(self._pubsub_subscriptions.items()):
360374
try:
375+
logger.debug(f"Closing pubsub subscription: {channel}")
361376
pubsub.close()
362-
except Exception:
363-
pass
377+
except Exception as e:
378+
logger.warning(f"Error closing pubsub {channel}: {e}")
364379
self._pubsub_subscriptions.clear()
365380

366381
# Close connection pool
367382
try:
368383
self._pool.disconnect()
369-
except Exception:
370-
pass
384+
logger.debug("Connection pool disconnected")
385+
except Exception as e:
386+
logger.warning(f"Error disconnecting pool: {e}")
371387

372388
self._healthy = False
389+
logger.info("Redis connection closed")
390+
391+
def __enter__(self):
392+
"""Context manager entry - returns self."""
393+
return self
394+
395+
def __exit__(self, exc_type, exc_val, exc_tb):
396+
"""Context manager exit - ensures cleanup on exit."""
397+
self.close()
398+
return False # Don't suppress exceptions
373399

374400

375401
class RobustPubSub:

QuakeLiveInterface/state.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,20 @@ def __init__(self):
5454
self.agent_kills = 0
5555
self.agent_deaths = 0
5656

57-
def update_from_redis(self, redis_data: str):
57+
def update_from_redis(self, redis_data: str) -> bool:
5858
"""
5959
Updates the game state from a JSON string received from Redis.
60+
6061
Args:
6162
redis_data: A JSON string containing the game state.
63+
64+
Returns:
65+
True if update succeeded, False if data was invalid/malformed.
6266
"""
67+
if not redis_data:
68+
logger.warning("Empty redis_data received")
69+
return False
70+
6371
try:
6472
data = json.loads(redis_data)
6573

@@ -87,12 +95,17 @@ def update_from_redis(self, redis_data: str):
8795
self.map_geometry = data.get('map_geometry')
8896

8997
logger.debug("Game state updated from Redis data.")
98+
return True
99+
90100
except json.JSONDecodeError as e:
91-
logger.error(f"Error decoding JSON from Redis: {e}")
92-
raise
101+
logger.error(f"Error decoding JSON from Redis: {e}. Data preview: {redis_data[:200] if redis_data else 'None'}")
102+
return False
93103
except KeyError as e:
94104
logger.error(f"Missing key in game state data from Redis: {e}")
95-
raise
105+
return False
106+
except Exception as e:
107+
logger.error(f"Unexpected error updating game state: {e}")
108+
return False
96109

97110
def _create_player_from_data(self, player_data):
98111
weapons = [Weapon(w['name'], w['ammo']) for w in player_data.get('weapons', [])]

docker-compose.multi.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,16 @@ services:
6464
- QL_AGENT_ENABLE_EVENTS=1
6565
- QL_AGENT_ENABLE_ADMIN=1
6666
- QL_AGENT_ENABLE_COMMANDS=1
67+
healthcheck:
68+
test: ["/healthcheck.sh"]
69+
interval: 30s
70+
timeout: 10s
71+
retries: 3
72+
start_period: 60s
73+
deploy:
74+
resources:
75+
limits:
76+
memory: 512M
6777
depends_on:
6878
redis:
6979
condition: service_healthy
@@ -94,6 +104,16 @@ services:
94104
- QL_AGENT_ENABLE_EVENTS=1
95105
- QL_AGENT_ENABLE_ADMIN=1
96106
- QL_AGENT_ENABLE_COMMANDS=1
107+
healthcheck:
108+
test: ["/healthcheck.sh"]
109+
interval: 30s
110+
timeout: 10s
111+
retries: 3
112+
start_period: 60s
113+
deploy:
114+
resources:
115+
limits:
116+
memory: 512M
97117
depends_on:
98118
redis:
99119
condition: service_healthy
@@ -124,6 +144,16 @@ services:
124144
- QL_AGENT_ENABLE_EVENTS=1
125145
- QL_AGENT_ENABLE_ADMIN=1
126146
- QL_AGENT_ENABLE_COMMANDS=1
147+
healthcheck:
148+
test: ["/healthcheck.sh"]
149+
interval: 30s
150+
timeout: 10s
151+
retries: 3
152+
start_period: 60s
153+
deploy:
154+
resources:
155+
limits:
156+
memory: 512M
127157
depends_on:
128158
redis:
129159
condition: service_healthy
@@ -154,6 +184,16 @@ services:
154184
- QL_AGENT_ENABLE_EVENTS=1
155185
- QL_AGENT_ENABLE_ADMIN=1
156186
- QL_AGENT_ENABLE_COMMANDS=1
187+
healthcheck:
188+
test: ["/healthcheck.sh"]
189+
interval: 30s
190+
timeout: 10s
191+
retries: 3
192+
start_period: 60s
193+
deploy:
194+
resources:
195+
limits:
196+
memory: 512M
157197
depends_on:
158198
redis:
159199
condition: service_healthy

docker-compose.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,16 @@ services:
5757
- QL_AGENT_ENABLE_EVENTS=1
5858
- QL_AGENT_ENABLE_ADMIN=1
5959
- QL_AGENT_ENABLE_COMMANDS=1
60+
healthcheck:
61+
test: ["CMD", "/healthcheck.sh"]
62+
interval: 30s
63+
timeout: 10s
64+
retries: 3
65+
start_period: 60s
66+
deploy:
67+
resources:
68+
limits:
69+
memory: 512M
6070
depends_on:
6171
redis:
6272
condition: service_healthy

docker/healthcheck.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
# Health check script for Quake Live server
3+
# Checks if the agent plugin is running by verifying Redis frame counter is updating
4+
5+
# Get the environment ID prefix (if set)
6+
PREFIX="${QLX_ENV_ID:+ql:$QLX_ENV_ID:}"
7+
PREFIX="${PREFIX:-ql:}"
8+
9+
# Check if agent:frame key exists and was updated recently
10+
FRAME=$(redis-cli -h ${QLX_REDISADDRESS:-redis} GET "${PREFIX}agent:frame" 2>/dev/null)
11+
12+
if [ -z "$FRAME" ]; then
13+
# No frame data yet - might still be starting up
14+
# Check if server process is running at least
15+
if pgrep -f "qzeroded" > /dev/null 2>&1; then
16+
exit 0 # Process running, give it time
17+
fi
18+
exit 1 # No process, unhealthy
19+
fi
20+
21+
# Frame counter exists, server is healthy
22+
exit 0

docker/server.cfg

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,11 @@ set qlx_agentSteamId "76561197984141695"
1919
set bot_enable 1
2020
set bot_minplayers 0
2121

22-
// Duel settings
23-
set g_warmupDelay 0
24-
set g_warmupReadyPercentage 0
25-
set g_levelStartDelay 0
26-
set g_doWarmup 0
27-
set g_warmup 0
22+
// Duel settings - require F3 ready up like normal servers
23+
set g_doWarmup 1
24+
set g_warmupDelay 15
25+
set g_warmupReadyPercentage 1
26+
set g_levelStartDelay 5
2827

2928
// Disable ALL voting and map changes
3029
set g_allowVote 0

0 commit comments

Comments
 (0)