-
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathalice.service
More file actions
69 lines (54 loc) · 1.78 KB
/
alice.service
File metadata and controls
69 lines (54 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
[Unit]
Description=ALICE - Remote Stable Diffusion Service
Documentation=https://github.com/fewtarius/alice
After=network.target
Wants=network-online.target
Requires=var-lib-alice.mount
After=var-lib-alice.mount
[Service]
Type=simple
User=alice
Group=alice
WorkingDirectory=/opt/alice
# Environment configuration
Environment="ALICE_CONFIG=/etc/alice/config.yaml"
Environment="PYTHONUNBUFFERED=1"
# For NVIDIA GPUs
Environment="CUDA_VISIBLE_DEVICES=0"
# For AMD GPUs (ROCm)
Environment="HIP_VISIBLE_DEVICES=0"
Environment="HSA_OVERRIDE_GFX_VERSION=11.0.0"
# PyTorch HIP memory management - reduce fragmentation for SDXL
Environment="PYTORCH_HIP_ALLOC_CONF=expandable_segments:True"
# For AMD Phoenix APU (gfx1103) - CRITICAL for preventing GPU hangs
# MIOPEN_DEBUG_FIND_ALL=0 disables MIOpen solver search that causes GPU hangs
Environment="MIOPEN_DEBUG_FIND_ALL=0"
Environment="PYTORCH_ROCM_ARCH=gfx1103"
# Start command
ExecStart=/opt/alice/venv/bin/python -m src.main
# Graceful reload - sends SIGHUP for config reload
ExecReload=/bin/kill -HUP $MAINPID
# Health check before marking service as started
# Waits up to 120s for the /health endpoint to respond
ExecStartPost=/bin/sh -c 'for i in $(seq 1 120); do curl -sf http://localhost:8090/livez >/dev/null 2>&1 && exit 0; sleep 1; done; exit 1'
# Restart configuration
Restart=always
RestartSec=10
TimeoutStartSec=300
TimeoutStopSec=60
# Systemd watchdog (optional - requires app support)
# WatchdogSec=60
# Security hardening
NoNewPrivileges=yes
PrivateTmp=yes
ProtectSystem=strict
ProtectHome=yes
ReadWritePaths=/opt/alice /var/lib/alice /var/log/alice /etc/alice
# Resource limits
LimitNOFILE=65536
LimitNPROC=4096
# Logging
StandardOutput=append:/var/log/alice/alice.log
StandardError=append:/var/log/alice/alice.log
[Install]
WantedBy=multi-user.target