Skip to content

Commit e5a93bd

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 0b7d564 + aa9538a commit e5a93bd

File tree

2 files changed

+21
-67
lines changed

2 files changed

+21
-67
lines changed

src/llama-model.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4825,11 +4825,13 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
48254825
// NextN/MTP tensors (preserved but unused) - conditionally load for last nextn_predict_layers
48264826
if (hparams.nextn_predict_layers > 0 && static_cast<uint32_t>(i) >= n_layer - hparams.nextn_predict_layers) {
48274827
layer.nextn.eh_proj = create_tensor(tn(LLM_TENSOR_NEXTN_EH_PROJ, "weight", i), { 2 * n_embd, n_embd }, flags);
4828-
layer.nextn.embed_tokens = create_tensor(tn(LLM_TENSOR_NEXTN_EMBED_TOKENS, "weight", i), { n_embd, n_vocab }, flags);
48294828
layer.nextn.enorm = create_tensor(tn(LLM_TENSOR_NEXTN_ENORM, "weight", i), { n_embd }, flags);
48304829
layer.nextn.hnorm = create_tensor(tn(LLM_TENSOR_NEXTN_HNORM, "weight", i), { n_embd }, flags);
4831-
layer.nextn.shared_head_head = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "weight", i), { n_embd, n_vocab }, flags);
4832-
layer.nextn.shared_head_norm = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "weight", i), { n_embd }, flags);
4830+
4831+
// Optional tensors
4832+
layer.nextn.embed_tokens = create_tensor(tn(LLM_TENSOR_NEXTN_EMBED_TOKENS, "weight", i), { n_embd, n_vocab }, flags | TENSOR_NOT_REQUIRED);
4833+
layer.nextn.shared_head_head = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD, "weight", i), { n_embd, n_vocab }, flags | TENSOR_NOT_REQUIRED);
4834+
layer.nextn.shared_head_norm = create_tensor(tn(LLM_TENSOR_NEXTN_SHARED_HEAD_NORM, "weight", i), { n_embd }, flags | TENSOR_NOT_REQUIRED);
48334835
}
48344836
}
48354837
}

tools/server/webui/scripts/dev.sh

Lines changed: 16 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
#!/bin/bash
22

3+
# Development script for llama.cpp webui
4+
#
5+
# This script starts the webui development servers (Storybook and Vite).
6+
# Note: You need to start llama-server separately.
7+
#
8+
# Usage:
9+
# bash scripts/dev.sh
10+
# npm run dev
11+
312
cd ../../../
413

514
# Check and install git hooks if missing
@@ -28,76 +37,19 @@ check_and_install_hooks() {
2837
# Install git hooks if needed
2938
check_and_install_hooks
3039

31-
# Check if llama-server binary already exists
32-
if [ ! -f "build/bin/llama-server" ]; then
33-
echo "Building llama-server..."
34-
cmake -B build && cmake --build build --config Release -t llama-server
35-
else
36-
echo "llama-server binary already exists, skipping build."
37-
fi
38-
39-
# Start llama-server and capture output
40-
echo "Starting llama-server..."
41-
mkfifo server_output.pipe
42-
build/bin/llama-server -hf ggml-org/gpt-oss-20b-GGUF --jinja -c 0 --no-webui > server_output.pipe 2>&1 &
43-
SERVER_PID=$!
44-
45-
# Function to wait for server to be ready
46-
wait_for_server() {
47-
echo "Waiting for llama-server to be ready..."
48-
local max_wait=60
49-
local start_time=$(date +%s)
50-
51-
# Read server output in background and look for the ready message
52-
(
53-
while IFS= read -r line; do
54-
echo "🔍 Server: $line"
55-
if [[ "$line" == *"server is listening on http://127.0.0.1:8080 - starting the main loop"* ]]; then
56-
echo "✅ llama-server is ready!"
57-
echo "READY" > server_ready.flag
58-
break
59-
fi
60-
done < server_output.pipe
61-
) &
62-
63-
# Wait for ready flag or timeout
64-
while [ ! -f server_ready.flag ]; do
65-
local current_time=$(date +%s)
66-
local elapsed=$((current_time - start_time))
67-
68-
if [ $elapsed -ge $max_wait ]; then
69-
echo "❌ Server failed to start within $max_wait seconds"
70-
rm -f server_ready.flag
71-
return 1
72-
fi
73-
74-
sleep 1
75-
done
76-
77-
rm -f server_ready.flag
78-
return 0
79-
}
80-
8140
# Cleanup function
8241
cleanup() {
8342
echo "🧹 Cleaning up..."
84-
kill $SERVER_PID 2>/dev/null
85-
rm -f server_output.pipe server_ready.flag
8643
exit
8744
}
8845

8946
# Set up signal handlers
9047
trap cleanup SIGINT SIGTERM
9148

92-
# Wait for server to be ready
93-
if wait_for_server; then
94-
echo "🚀 Starting development servers..."
95-
cd tools/server/webui
96-
storybook dev -p 6006 --ci & vite dev --host 0.0.0.0 &
97-
98-
# Wait for all background processes
99-
wait
100-
else
101-
echo "❌ Failed to start development environment"
102-
cleanup
103-
fi
49+
echo "🚀 Starting development servers..."
50+
echo "📝 Note: Make sure to start llama-server separately if needed"
51+
cd tools/server/webui
52+
storybook dev -p 6006 --ci & vite dev --host 0.0.0.0 &
53+
54+
# Wait for all background processes
55+
wait

0 commit comments

Comments
 (0)