forked from kvcache-ai/ktransformers
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinstall.sh
More file actions
executable file
·388 lines (339 loc) · 13.5 KB
/
install.sh
File metadata and controls
executable file
·388 lines (339 loc) · 13.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
#!/usr/bin/env bash
set -euo pipefail
usage() {
cat <<EOF
Usage: $0 [SUBCOMMAND] [BUILD_OPTIONS]
Two-step installation in one file. Choose a subcommand:
SUBCOMMANDS:
deps Install system prerequisites only
build Build and install kt-kernel (no dependency install)
all Run deps then build (default when no subcommand)
-h, --help Show this help message
BUILD_OPTIONS (for "build" or "all"):
(none) Auto-detect CPU and configure automatically (recommended)
--manual Skip auto-detection, use manual configuration (see below)
--no-clean Do not delete local build/ before building (default cleans)
AUTO-DETECTION (Default):
The script will automatically detect your CPU and use ALL available features:
- CPUINFER_CPU_INSTRUCT = NATIVE (uses -march=native)
- CPUINFER_ENABLE_AMX = ON/OFF (based on detection)
- CPUINFER_ENABLE_AVX512_VNNI = ON/OFF (with fallback if OFF)
- CPUINFER_ENABLE_AVX512_BF16 = ON/OFF (with fallback if OFF)
✓ Best performance on YOUR machine
✗ Binary may NOT work on different/older CPUs
Use this when: Installing for local use only
MANUAL CONFIGURATION:
Use --manual flag when building for DISTRIBUTION or different machines.
Set these environment variables before running:
CPUINFER_CPU_INSTRUCT - Target CPU instruction set
Options: AVX512, AVX2, FANCY, NATIVE
CPUINFER_ENABLE_AMX - Enable Intel AMX support
Options: ON, OFF
Distribution examples (portable binaries):
┌──────────────────────────────────────────────────────────────────────────┐
│ Configuration │ Target CPUs │ Use Case │
├────────────────────────┼──────────────────────────┼──────────────────────┤
│ AVX512 + AMX=OFF │ Skylake-X, Ice Lake, │ General distribution │
│ │ Cascade Lake, Zen 4 │ (recommended) │
├────────────────────────┼──────────────────────────┼──────────────────────┤
│ AVX2 + AMX=OFF │ Haswell (2013) and newer │ Maximum compatibility│
├────────────────────────┼──────────────────────────┼──────────────────────┤
│ FANCY + AMX=OFF │ Ice Lake+, Zen 4+ │ Modern CPUs only │
│ │ (with full AVX512 ext) │ │
└────────────────────────┴──────────────────────────┴──────────────────────┘
Use this when: Building Docker images, PyPI packages, or deploying to clusters
Example: Build for general distribution
export CPUINFER_CPU_INSTRUCT=AVX512
export CPUINFER_ENABLE_AMX=OFF
$0 build --manual
# Result: Works on any CPU with AVX512 (2017+)
Example: Build for maximum compatibility
export CPUINFER_CPU_INSTRUCT=AVX2
export CPUINFER_ENABLE_AMX=OFF
$0 build --manual
# Result: Works on any CPU with AVX2 (2013+)
Optional variables (with defaults):
CPUINFER_BUILD_TYPE=Release Build type (Debug/RelWithDebInfo/Release)
CPUINFER_PARALLEL=8 Number of parallel build jobs
CPUINFER_VERBOSE=1 Verbose build output (0/1)
CPUINFER_ENABLE_AVX512_VNNI=ON/OFF Override VNNI detection (auto if unset)
CPUINFER_ENABLE_AVX512_BF16=ON/OFF Override BF16 detection (auto if unset)
Software Fallback Support:
✓ If VNNI not available: Uses AVX512BW fallback (2-3x slower but works)
✓ If BF16 not available: Uses AVX512F fallback (5-10x slower but works)
→ Old CPUs with only AVX512F+BW can run all code (slower but functional)
EOF
exit 1
}
install_dependencies() {
echo "Checking and installing system dependencies..."
# Determine if we need to use sudo
SUDO=""
if [ "${EUID:-0}" -ne 0 ]; then
if command -v sudo &> /dev/null; then
SUDO="sudo"
else
echo "Warning: Not running as root and sudo not found. Package installation may fail."
echo "Please run as root or install sudo."
fi
fi
if command -v conda &> /dev/null; then
echo "Installing cmake via conda..."
conda install -y cmake
else
echo "Warning: conda not found. Skipping cmake installation via conda."
echo "Please install conda or manually install cmake."
fi
# Detect OS type
if [ -f /etc/os-release ]; then
. /etc/os-release
OS=$ID
elif [ -f /etc/debian_version ]; then
OS="debian"
elif [ -f /etc/redhat-release ]; then
OS="rhel"
else
echo "Warning: Unable to detect OS type. Skipping dependency installation."
return 0
fi
# Install dependencies based on OS
case "$OS" in
debian|ubuntu|linuxmint|pop)
echo "Detected Debian-based system. Installing libhwloc-dev and pkg-config..."
$SUDO apt update
$SUDO apt install -y libhwloc-dev pkg-config
;;
fedora|rhel|centos|rocky|almalinux)
echo "Detected Red Hat-based system. Installing hwloc-devel and pkgconfig..."
$SUDO dnf install -y hwloc-devel pkgconfig || $SUDO yum install -y hwloc-devel pkgconfig
;;
arch|manjaro)
echo "Detected Arch-based system. Installing hwloc and pkgconf..."
$SUDO pacman -S --noconfirm hwloc pkgconf
;;
opensuse*|sles)
echo "Detected openSUSE-based system. Installing hwloc-devel and pkg-config..."
$SUDO zypper install -y hwloc-devel pkg-config
;;
*)
echo "Warning: Unsupported OS '$OS'. Please manually install libhwloc-dev and pkg-config."
;;
esac
}
# Function to detect CPU features
# Returns: "has_amx has_avx512_vnni has_avx512_bf16" (space-separated 0/1 values)
detect_cpu_features() {
local has_amx=0
local has_avx512_vnni=0
local has_avx512_bf16=0
if [ -f /proc/cpuinfo ]; then
local cpu_flags
cpu_flags=$(grep -m1 "^flags" /proc/cpuinfo | tr ' ' '\n')
# Check for AMX support on Linux
if echo "$cpu_flags" | grep -qE "amx_tile|amx_int8|amx_bf16"; then
has_amx=1
fi
# Check for AVX512_VNNI support
if echo "$cpu_flags" | grep -qE "avx512_vnni|avx512vnni"; then
has_avx512_vnni=1
fi
# Check for AVX512_BF16 support
if echo "$cpu_flags" | grep -qE "avx512_bf16|avx512bf16"; then
has_avx512_bf16=1
fi
elif [ "$(uname)" = "Darwin" ]; then
# macOS doesn't have AMX (ARM or Intel without AMX)
has_amx=0
has_avx512_vnni=0
has_avx512_bf16=0
fi
echo "$has_amx $has_avx512_vnni $has_avx512_bf16"
}
build_step() {
# Parse build-only flags from arguments to this function
local MANUAL_MODE=0
local CLEAN_BUILD=1
while [[ $# -gt 0 ]]; do
case "$1" in
--manual) MANUAL_MODE=1; shift ;;
--no-clean) CLEAN_BUILD=0; shift ;;
-h|--help) usage ;;
*) break ;;
esac
done
# Clean local build directory to ensure a fresh CMake/configure
local REPO_ROOT
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
if [[ "$CLEAN_BUILD" -eq 1 ]]; then
if [[ -d "$REPO_ROOT/build" ]]; then
echo "Cleaning previous build directory: $REPO_ROOT/build"
rm -rf "$REPO_ROOT/build"
fi
else
echo "Skipping clean of $REPO_ROOT/build (requested by --no-clean)"
fi
if [ "$MANUAL_MODE" = "0" ]; then
# Auto-detection mode
echo "=========================================="
echo "Auto-detecting CPU capabilities..."
echo "=========================================="
echo ""
# detect_cpu_features returns "has_amx has_avx512_vnni has_avx512_bf16"
CPU_FEATURES=$(detect_cpu_features)
HAS_AMX=$(echo "$CPU_FEATURES" | cut -d' ' -f1)
HAS_AVX512_VNNI=$(echo "$CPU_FEATURES" | cut -d' ' -f2)
HAS_AVX512_BF16=$(echo "$CPU_FEATURES" | cut -d' ' -f3)
export CPUINFER_CPU_INSTRUCT=NATIVE
if [ "$HAS_AMX" = "1" ]; then
echo "✓ AMX instructions detected"
export CPUINFER_ENABLE_AMX=ON
echo ""
echo "Configuration: NATIVE + AMX=ON"
echo " ✓ Best performance on this machine"
echo " ✗ Binary requires Sapphire Rapids or newer CPU"
else
echo "ℹ AMX instructions not detected"
export CPUINFER_ENABLE_AMX=OFF
echo ""
echo "Configuration: NATIVE + AMX=OFF"
echo " ✓ Using AVX512/AVX2 instructions"
fi
echo ""
echo " ⚠️ IMPORTANT: This binary is optimized for THIS CPU only"
echo " To build portable binaries for distribution, use:"
echo " export CPUINFER_CPU_INSTRUCT=AVX512 # or AVX2"
echo " export CPUINFER_ENABLE_AMX=OFF"
echo " ./install.sh build --manual"
# Fine-grained AVX512 subset detection (with fallback support)
echo ""
echo "AVX512 Feature Detection:"
# VNNI: Check if user manually set it, otherwise auto-detect
if [ -n "${CPUINFER_ENABLE_AVX512_VNNI:-}" ]; then
echo " VNNI: User override = $CPUINFER_ENABLE_AVX512_VNNI"
else
if [ "$HAS_AVX512_VNNI" = "1" ]; then
echo " VNNI: ✓ Detected (hardware acceleration enabled)"
export CPUINFER_ENABLE_AVX512_VNNI=ON
else
echo " VNNI: ✗ Not detected (will use software fallback, 2-3x slower)"
export CPUINFER_ENABLE_AVX512_VNNI=OFF
fi
fi
# BF16: Check if user manually set it, otherwise auto-detect
if [ -n "${CPUINFER_ENABLE_AVX512_BF16:-}" ]; then
echo " BF16: User override = $CPUINFER_ENABLE_AVX512_BF16"
else
if [ "$HAS_AVX512_BF16" = "1" ]; then
echo " BF16: ✓ Detected (hardware acceleration enabled)"
export CPUINFER_ENABLE_AVX512_BF16=ON
else
echo " BF16: ✗ Not detected (will use software fallback, 5-10x slower)"
export CPUINFER_ENABLE_AVX512_BF16=OFF
fi
fi
echo ""
echo " Note: Software fallbacks ensure all code works on older CPUs"
echo " Tip: Override with CPUINFER_ENABLE_AVX512_VNNI/BF16=ON/OFF"
echo ""
echo "To use manual configuration instead, run: $0 build --manual"
echo ""
else
# Manual mode - validate user configuration (no exports)
if [ -z "$CPUINFER_CPU_INSTRUCT" ] || [ -z "$CPUINFER_ENABLE_AMX" ]; then
echo "Error: Manual mode requires CPUINFER_CPU_INSTRUCT and CPUINFER_ENABLE_AMX to be set."
echo ""
usage
fi
# Validate CPUINFER_CPU_INSTRUCT
case "$CPUINFER_CPU_INSTRUCT" in
NATIVE|FANCY|AVX512|AVX2)
;;
*)
echo "Error: Invalid CPUINFER_CPU_INSTRUCT='$CPUINFER_CPU_INSTRUCT'"
echo "Must be one of: NATIVE, FANCY, AVX512, AVX2"
exit 1
;;
esac
# Validate CPUINFER_ENABLE_AMX
case "$CPUINFER_ENABLE_AMX" in
ON|OFF)
;;
*)
echo "Error: Invalid CPUINFER_ENABLE_AMX='$CPUINFER_ENABLE_AMX'"
echo "Must be either: ON or OFF"
exit 1
;;
esac
# Warn about problematic configuration
if [ "$CPUINFER_CPU_INSTRUCT" = "NATIVE" ] && [ "$CPUINFER_ENABLE_AMX" = "OFF" ]; then
CPU_FEATURES=$(detect_cpu_features)
HAS_AMX=$(echo "$CPU_FEATURES" | cut -d' ' -f1)
if [ "$HAS_AMX" = "1" ]; then
echo "=========================================="
echo "⚠️ WARNING: Risky Configuration"
echo "=========================================="
echo ""
echo "Your configuration:"
echo " CPUINFER_CPU_INSTRUCT = NATIVE"
echo " CPUINFER_ENABLE_AMX = OFF"
echo ""
echo "Your CPU HAS AMX support!"
echo ""
echo "Problem:"
echo " • NATIVE uses -march=native which auto-enables ALL CPU features"
echo " • This may IGNORE your AMX=OFF setting"
echo " • The binary may still contain AMX instructions"
echo ""
echo "Recommended fixes:"
echo " 1) For portable build (recommended for distribution):"
echo " export CPUINFER_CPU_INSTRUCT=AVX512"
echo ""
echo " 2) If you want best performance on this CPU:"
echo " export CPUINFER_ENABLE_AMX=ON"
echo ""
read -p "Continue with risky configuration? (y/N) " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
exit 1
fi
fi
fi
# Close MANUAL_MODE conditional
fi
# Set defaults for optional variables
export CPUINFER_BUILD_TYPE=${CPUINFER_BUILD_TYPE:-Release}
export CPUINFER_PARALLEL=${CPUINFER_PARALLEL:-8}
export CPUINFER_VERBOSE=${CPUINFER_VERBOSE:-1}
echo "=========================================="
echo "Building kt-kernel with configuration:"
echo "=========================================="
echo " CPUINFER_CPU_INSTRUCT = $CPUINFER_CPU_INSTRUCT"
echo " CPUINFER_ENABLE_AMX = $CPUINFER_ENABLE_AMX"
echo " CPUINFER_ENABLE_AVX512_VNNI = ${CPUINFER_ENABLE_AVX512_VNNI:-AUTO}"
echo " CPUINFER_ENABLE_AVX512_BF16 = ${CPUINFER_ENABLE_AVX512_BF16:-AUTO}"
echo " CPUINFER_BUILD_TYPE = $CPUINFER_BUILD_TYPE"
echo " CPUINFER_PARALLEL = $CPUINFER_PARALLEL"
echo ""
pip install . -v
}
# Subcommand dispatcher: default to "all"
SUBCMD="all"
if [[ $# -gt 0 ]]; then
case "$1" in
deps|build|all) SUBCMD="$1"; shift ;;
-h|--help) usage ;;
*) SUBCMD="build" ;; # backward compatibility: flags-only => build
esac
fi
case "$SUBCMD" in
deps)
install_dependencies
;;
build)
build_step "$@"
;;
all)
install_dependencies
build_step "$@"
;;
esac