Skip to content

Commit e063bce

Browse files
Merge branch 'ggerganov:master' into master
2 parents a7812d9 + d5ab297 commit e063bce

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+7697
-2095
lines changed

.github/ISSUE_TEMPLATE/bug.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,5 @@ assignees: ''
77
---
88

99
Please include information about your system, the steps to reproduce the bug, and the version of llama.cpp that you are using. If possible, please provide a minimal code example that reproduces the bug.
10+
11+
If the bug concerns the server, please try to reproduce it first using the [server test scenario framework](https://github.com/ggerganov/llama.cpp/tree/master/examples/server/tests).

.github/workflows/build.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -669,8 +669,7 @@ jobs:
669669
run: |
670670
cd examples/llama.android
671671
672-
# Skip armeabi-v7a for now (https://github.com/llvm/llvm-project/issues/65820).
673-
./gradlew build --no-daemon -Pskip-armeabi-v7a
672+
./gradlew build --no-daemon
674673
675674
# freeBSD-latest:
676675
# runs-on: macos-12

.github/workflows/server.yml

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Server build and tests
2+
name: Server
3+
4+
on:
5+
workflow_dispatch: # allows manual triggering
6+
push:
7+
branches:
8+
- master
9+
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
10+
pull_request:
11+
types: [opened, synchronize, reopened]
12+
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
13+
14+
jobs:
15+
server:
16+
runs-on: ubuntu-latest
17+
18+
strategy:
19+
matrix:
20+
sanitizer: [ADDRESS, THREAD, UNDEFINED]
21+
build_type: [Debug, Release]
22+
include:
23+
- build_type: Release
24+
sanitizer: ""
25+
exclude:
26+
- build_type: Release
27+
sanitizer: ADDRESS
28+
- build_type: Release
29+
sanitizer: THREAD
30+
- build_type: Release
31+
sanitizer: UNDEFINED
32+
33+
container:
34+
image: ubuntu:latest
35+
ports:
36+
- 8888
37+
options: --cpus 4
38+
39+
steps:
40+
- name: Clone
41+
id: checkout
42+
uses: actions/checkout@v3
43+
44+
- name: Dependencies
45+
id: depends
46+
run: |
47+
apt-get update
48+
apt-get -y install \
49+
build-essential \
50+
git \
51+
cmake \
52+
python3-pip \
53+
wget \
54+
psmisc
55+
56+
- name: Build
57+
id: cmake_build
58+
run: |
59+
mkdir build
60+
cd build
61+
cmake .. \
62+
-DLLAMA_NATIVE=OFF \
63+
-DLLAMA_BUILD_SERVER=ON \
64+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
65+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
66+
cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server
67+
68+
- name: Tests dependencies
69+
id: test_dependencies
70+
run: |
71+
pip install -r examples/server/tests/requirements.txt
72+
73+
- name: Download models
74+
id: download_models
75+
run: |
76+
cd examples/server/tests
77+
../../../scripts/hf.sh --repo ggml-org/models --file tinyllamas/stories260K.gguf
78+
79+
- name: Tests
80+
id: server_integration_test
81+
run: |
82+
cd examples/server/tests
83+
PORT=8888 ./tests.sh

CMakeLists.txt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -936,10 +936,16 @@ if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STR
936936
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access)
937937
endif()
938938
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
939-
# Raspberry Pi 2
940-
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
939+
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android")
940+
# Android armeabi-v7a
941+
list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations)
942+
else()
943+
# Raspberry Pi 2
944+
list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations)
945+
endif()
941946
endif()
942947
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
948+
# Android arm64-v8a
943949
# Raspberry Pi 3, 4, Zero 2 (32-bit)
944950
list(APPEND ARCH_FLAGS -mno-unaligned-access)
945951
endif()

Makefile

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,13 @@ ifdef LLAMA_BLIS
381381
endif # LLAMA_BLIS
382382

383383
ifdef LLAMA_CUBLAS
384-
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include -I/usr/local/cuda/targets/aarch64-linux/include
385-
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib
384+
ifneq ('', '$(wildcard /opt/cuda)')
385+
CUDA_PATH ?= /opt/cuda
386+
else
387+
CUDA_PATH ?= /usr/local/cuda
388+
endif
389+
MK_CPPFLAGS += -DGGML_USE_CUBLAS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
390+
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L/usr/lib/wsl/lib
386391
OBJS += ggml-cuda.o
387392
MK_NVCCFLAGS += -use_fast_math
388393
ifdef LLAMA_FATAL_WARNINGS
@@ -597,7 +602,7 @@ $(info I CC: $(shell $(CC) --version | head -n 1))
597602
$(info I CXX: $(shell $(CXX) --version | head -n 1))
598603
ifdef LLAMA_CUBLAS
599604
$(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
600-
CUDA_VERSION := $(shell nvcc --version | grep -oP 'release (\K[0-9]+\.[0-9])')
605+
CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
601606
ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
602607
ifndef CUDA_DOCKER_ARCH
603608
ifndef CUDA_POWER_ARCH

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,16 @@ Typically finetunes of the base models below are supported as well.
107107

108108
**Multimodal models:**
109109

110-
- [x] [LLaVA 1.5 models](https://huggingface.co/collections/liuhaotian/llava-15-653aac15d994e992e2677a7e)
110+
- [x] [LLaVA 1.5 models](https://huggingface.co/collections/liuhaotian/llava-15-653aac15d994e992e2677a7e), [LLaVA 1.6 models](https://huggingface.co/collections/liuhaotian/llava-16-65b9e40155f60fd046a5ccf2)
111111
- [x] [BakLLaVA](https://huggingface.co/models?search=SkunkworksAI/Bakllava)
112112
- [x] [Obsidian](https://huggingface.co/NousResearch/Obsidian-3B-V0.5)
113113
- [x] [ShareGPT4V](https://huggingface.co/models?search=Lin-Chen/ShareGPT4V)
114114
- [x] [MobileVLM 1.7B/3B models](https://huggingface.co/models?search=mobileVLM)
115115
- [x] [Yi-VL](https://huggingface.co/models?search=Yi-VL)
116116

117+
**HTTP server**
118+
119+
[llama.cpp web server](./examples/server) is a lightweight [OpenAI API](https://github.com/openai/openai-openapi) compatible HTTP server that can be used to serve local models and easily connect them to existing clients.
117120

118121
**Bindings:**
119122

@@ -155,6 +158,8 @@ Unless otherwise noted these projects are open-source with permissive licensing:
155158
- [semperai/amica](https://github.com/semperai/amica)
156159
- [withcatai/catai](https://github.com/withcatai/catai)
157160
- [Mobile-Artificial-Intelligence/maid](https://github.com/Mobile-Artificial-Intelligence/maid) (MIT)
161+
- [Msty](https://msty.app) (proprietary)
162+
- [LLMFarm](https://github.com/guinmoon/LLMFarm?tab=readme-ov-file) (MIT)
158163

159164
---
160165

awq-py/README.md

Lines changed: 0 additions & 116 deletions
This file was deleted.

0 commit comments

Comments
 (0)