Skip to content

Commit bae649a

Browse files
authored
Merge pull request #29 from apicalshark/1
title
2 parents 5535c9f + 78156d7 commit bae649a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+5845
-28222
lines changed

.devops/full.Dockerfile

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,36 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
6+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
7+
8+
WORKDIR /app
9+
10+
COPY . .
711

8-
COPY requirements.txt requirements.txt
9-
COPY requirements requirements
12+
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
13+
cmake --build build -j $(nproc) && \
14+
mkdir -p /app/lib && \
15+
find build -name "*.so" -exec cp {} /app/lib/ \;
1016

11-
RUN pip install --upgrade pip setuptools wheel \
12-
&& pip install -r requirements.txt
17+
FROM ubuntu:$UBUNTU_VERSION as runtime
1318

1419
WORKDIR /app
1520

16-
COPY . .
21+
RUN apt-get update && \
22+
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
1723

18-
ENV LLAMA_CURL=1
24+
COPY requirements.txt /app/requirements.txt
25+
COPY requirements /app/requirements
26+
COPY .devops/tools.sh /app/tools.sh
1927

28+
RUN pip install --upgrade pip setuptools wheel && \
29+
pip install -r /app/requirements.txt
2030

21-
RUN make -j$(nproc)
31+
COPY --from=build /app/build/bin/ /app/
32+
COPY --from=build /app/lib/ /app/
33+
COPY --from=build /app/convert_hf_to_gguf.py /app/
34+
COPY --from=build /app/gguf-py /app/gguf-py
2235

2336
ENV LC_ALL=C.utf8
2437

25-
ENTRYPOINT ["/app/.devops/tools.sh"]
38+
ENTRYPOINT ["/app/tools.sh"]

.devops/llama-cli.Dockerfile

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,27 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential git
6+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
77

88
WORKDIR /app
99

1010
COPY . .
1111

12-
RUN make -j$(nproc) llama-cli
12+
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
13+
cmake --build build -j $(nproc) && \
14+
mkdir -p /app/lib && \
15+
find build -name "*.so" -exec cp {} /app/lib/ \;
1316

1417
FROM ubuntu:$UBUNTU_VERSION AS runtime
1518

19+
WORKDIR /app
20+
1621
RUN apt-get update && \
17-
apt-get install -y libgomp1
22+
apt-get install -y libcurl4-openssl-dev libgomp1 curl
1823

19-
COPY --from=build /app/llama-cli /llama-cli
24+
COPY --from=build /app/build/bin/llama-cli /app/
25+
COPY --from=build /app/lib/ /app/
2026

2127
ENV LC_ALL=C.utf8
2228

23-
ENTRYPOINT [ "/llama-cli" ]
29+
ENTRYPOINT [ "/app/llama-cli" ]

.devops/llama-server.Dockerfile

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,33 +9,25 @@ WORKDIR /app
99

1010
COPY . .
1111

12-
13-
RUN \
14-
# Build multiple versions of the CPU backend
15-
scripts/build-cpu.sh avx -DGGML_AVX=ON -DGGML_AVX2=OFF && \
16-
scripts/build-cpu.sh avx2 -DGGML_AVX=ON -DGGML_AVX2=ON && \
17-
scripts/build-cpu.sh avx512 -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
18-
scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
19-
# Build llama-server
20-
cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
21-
cmake --build build --target llama-server -j $(nproc) && \
22-
# Copy the built libraries to /app/lib
12+
RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
13+
cmake --build build -j $(nproc) && \
2314
mkdir -p /app/lib && \
24-
mv libggml-cpu* /app/lib/ && \
2515
find build -name "*.so" -exec cp {} /app/lib/ \;
2616

2717
FROM ubuntu:$UBUNTU_VERSION AS runtime
2818

19+
WORKDIR /app
20+
2921
RUN apt-get update && \
3022
apt-get install -y libcurl4-openssl-dev libgomp1 curl
3123

32-
COPY --from=build /app/build/bin/llama-server /llama-server
33-
COPY --from=build /app/lib/ /
24+
COPY --from=build /app/build/bin/llama-server /app/
25+
COPY --from=build /app/lib/ /app/
3426

3527
ENV LC_ALL=C.utf8
3628
# Must be set to 0.0.0.0 so it can listen to requests from host machine
3729
ENV LLAMA_ARG_HOST=0.0.0.0
3830

3931
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
4032

41-
ENTRYPOINT [ "/llama-server" ]
33+
ENTRYPOINT [ "/app/llama-server" ]

.github/workflows/server.yml

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,20 +76,26 @@ jobs:
7676
run: |
7777
pip install -r examples/server/tests/requirements.txt
7878
79-
- name: Verify server deps
80-
id: verify_server_deps
79+
# Setup nodejs (to be used for verifying bundled index.html)
80+
- uses: actions/setup-node@v4
81+
with:
82+
node-version: 22
83+
84+
- name: Verify bundled index.html
85+
id: verify_server_index_html
8186
run: |
8287
git config --global --add safe.directory $(realpath .)
83-
cd examples/server
84-
git ls-files --others --modified
88+
cd examples/server/webui
8589
git status
86-
./deps.sh
90+
npm ci
91+
npm run build
8792
git status
88-
not_ignored_files="$(git ls-files --others --modified)"
89-
echo "Modified files: ${not_ignored_files}"
90-
if [ -n "${not_ignored_files}" ]; then
91-
echo "Repository is dirty or server deps are not built as expected"
92-
echo "${not_ignored_files}"
93+
modified_files="$(git status -s)"
94+
echo "Modified files: ${modified_files}"
95+
if [ -n "${modified_files}" ]; then
96+
echo "Repository is dirty or server/webui is not built as expected"
97+
echo "Hint: You may need to follow Web UI build guide in server/README.md"
98+
echo "${modified_files}"
9399
exit 1
94100
fi
95101

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ examples/server/*.mjs.hpp
104104
!examples/sycl/*.bat
105105
!examples/sycl/*.sh
106106

107+
# Server Web UI temporary files
108+
node_modules
109+
examples/server/webui/dist
110+
107111
# Python
108112

109113
/.venv

Makefile

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,8 +1145,15 @@ $(LIB_COMMON_S): $(OBJ_COMMON)
11451145
# Include dependency files
11461146
-include $(DEP_FILES)
11471147

1148+
# Clean generated server assets
1149+
clean-server-assets:
1150+
find examples/server -type f -name "*.js.hpp" -delete
1151+
find examples/server -type f -name "*.mjs.hpp" -delete
1152+
find examples/server -type f -name "*.css.hpp" -delete
1153+
find examples/server -type f -name "*.html.hpp" -delete
1154+
11481155
# Clean rule
1149-
clean:
1156+
clean: clean-server-assets
11501157
rm -vrf $(BUILD_TARGETS) $(TEST_TARGETS)
11511158
rm -rvf *.a *.dll *.so *.dot
11521159
find ggml src common tests examples pocs -type f -name "*.o" -delete
@@ -1354,20 +1361,14 @@ llama-server: \
13541361
examples/server/utils.hpp \
13551362
examples/server/httplib.h \
13561363
examples/server/index.html.hpp \
1357-
examples/server/completion.js.hpp \
13581364
examples/server/loading.html.hpp \
1359-
examples/server/deps_daisyui.min.css.hpp \
1360-
examples/server/deps_markdown-it.js.hpp \
1361-
examples/server/deps_tailwindcss.js.hpp \
1362-
examples/server/deps_vue.esm-browser.js.hpp \
13631365
common/json.hpp \
1364-
common/stb_image.h \
13651366
$(OBJ_ALL)
13661367
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
13671368
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
13681369

13691370
# Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
1370-
examples/server/%.hpp: examples/server/public/% Makefile
1371+
examples/server/%.hpp: examples/server/public/% FORCE Makefile
13711372
@( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \
13721373
echo "unsigned char $${NAME}[] = {" && \
13731374
cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \
@@ -1542,7 +1543,7 @@ llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \
15421543
# Deprecated binaries that we want to keep around long enough for people to migrate to the new filenames, then these can be removed.
15431544
#
15441545
# Mark legacy binary targets as .PHONY so that they are always checked.
1545-
.PHONY: main quantize perplexity embedding server
1546+
.PHONY: FORCE main quantize perplexity embedding server
15461547

15471548
# Define the object file target
15481549
examples/deprecation-warning/deprecation-warning.o: examples/deprecation-warning/deprecation-warning.cpp

convert_hf_to_gguf.py

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1831,29 +1831,40 @@ class MiniCPMModel(Model):
18311831
model_arch = gguf.MODEL_ARCH.MINICPM
18321832

18331833
def set_gguf_parameters(self):
1834-
block_count = self.hparams["num_hidden_layers"]
1835-
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
1836-
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
1837-
self.gguf_writer.add_block_count(block_count)
1838-
self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
1839-
self.gguf_writer.add_rope_dimension_count(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
1840-
self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
1841-
self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
1842-
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
1843-
self.gguf_writer.add_file_type(self.ftype)
1834+
super().set_gguf_parameters()
1835+
embedding_scale = float(self.hparams["scale_emb"])
1836+
self.gguf_writer.add_embedding_scale(embedding_scale)
1837+
logger.info(f"gguf: (minicpm) embedding_scale = {embedding_scale}")
1838+
residual_scale = self.hparams["scale_depth"] / self.hparams["num_hidden_layers"] ** 0.5
1839+
self.gguf_writer.add_residual_scale(residual_scale)
1840+
logger.info(f"gguf: (minicpm) residual_scale = {residual_scale}")
1841+
logit_scale = self.hparams["hidden_size"] / self.hparams["dim_model_base"]
1842+
self.gguf_writer.add_logit_scale(logit_scale)
1843+
logger.info(f"gguf: (minicpm) logit_scale = {logit_scale}")
1844+
if self.hparams.get("rope_scaling") is not None:
1845+
if self.hparams["rope_scaling"].get("type") == "longrope":
1846+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LONGROPE)
1847+
logger.info(f"gguf: (minicpm) rope_scaling_type = {gguf.RopeScalingType.LONGROPE}")
18441848

1845-
def set_vocab(self):
1846-
self._set_vocab_llama_hf()
1849+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
1850+
rope_dims = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
18471851

1848-
def _reverse_hf_permute(self, weights: Tensor, n_head: int, n_kv_head: int | None = None) -> Tensor:
1849-
if n_kv_head is not None and n_head != n_kv_head:
1850-
n_head //= n_kv_head
1852+
rope_scaling = self.find_hparam(['rope_scaling'], True)
1853+
if rope_scaling is not None:
1854+
long_factors = rope_scaling.get('long_factor', None)
1855+
short_factors = rope_scaling.get('short_factor', None)
18511856

1852-
return (
1853-
weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
1854-
.swapaxes(1, 2)
1855-
.reshape(weights.shape)
1856-
)
1857+
if long_factors is None or short_factors is None:
1858+
raise KeyError('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor')
1859+
1860+
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
1861+
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
1862+
1863+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_LONG), torch.tensor(long_factors, dtype=torch.float32))
1864+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT), torch.tensor(short_factors, dtype=torch.float32))
1865+
1866+
def set_vocab(self):
1867+
self._set_vocab_sentencepiece()
18571868

18581869
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
18591870
del bid # unused
@@ -1863,9 +1874,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
18631874

18641875
# HF models permute some of the tensors, so we need to undo that
18651876
if name.endswith(("q_proj.weight")):
1866-
data_torch = self._reverse_hf_permute(data_torch, n_head, n_head)
1877+
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
18671878
if name.endswith(("k_proj.weight")):
1868-
data_torch = self._reverse_hf_permute(data_torch, n_head, n_kv_head)
1879+
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
18691880

18701881
return [(self.map_tensor_name(name), data_torch)]
18711882

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#
1818
# python3 convert_hf_to_gguf_update.py <huggingface_token>
1919
#
20-
# - Copy-paste the generated get_vocab_base_pre() function into convert_hf_to_gguf.py
20+
# - The convert_hf_to_gguf.py script will have had its get_vocab_base_pre() function updated
2121
# - Update llama.cpp with the new pre-tokenizer if necessary
2222
#
2323
# TODO: generate tokenizer tests for llama.cpp

docs/build.md

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,24 @@ cmake --build build --config Release
2626

2727
1. Single-config generators (e.g. default = `Unix Makefiles`; note that they just ignore the `--config` flag):
2828

29-
```bash
30-
cmake -B build -DCMAKE_BUILD_TYPE=Debug
31-
cmake --build build
32-
```
29+
```bash
30+
cmake -B build -DCMAKE_BUILD_TYPE=Debug
31+
cmake --build build
32+
```
3333

3434
2. Multi-config generators (`-G` param set to Visual Studio, XCode...):
3535

36-
```bash
37-
cmake -B build -G "Xcode"
38-
cmake --build build --config Debug
39-
```
36+
```bash
37+
cmake -B build -G "Xcode"
38+
cmake --build build --config Debug
39+
```
4040

4141
For more details and a list of supported generators, see the [CMake documentation](https://cmake.org/cmake/help/latest/manual/cmake-generators.7.html).
42+
- For static builds, add `-DBUILD_SHARED_LIBS=OFF`:
43+
```
44+
cmake -B build -DBUILD_SHARED_LIBS=OFF
45+
cmake --build build --config Release
46+
```
4247
4348
- Building for Windows (x86, x64 and arm64) with MSVC or clang as compilers:
4449
- Install Visual Studio 2022, e.g. via the [Community Edition](https://visualstudio.microsoft.com/de/vs/community/). In the installer, select at least the following options (this also automatically installs the required additional tools like CMake,...):

examples/deprecation-warning/deprecation-warning.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ int main(int argc, char** argv) {
1212
}
1313

1414
// Get only the program name from the full path
15-
auto pos = filename.find_last_of('/');
15+
auto pos = filename.find_last_of("/\\");
1616
if (pos != std::string::npos) {
1717
filename = filename.substr(pos+1);
1818
}

0 commit comments

Comments
 (0)