Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
69750ef
devops: move s390x and ppc64le ci build
taronaeo Sep 10, 2025
69c7c30
devops: disable ppc64le for now since they have compiler errors
taronaeo Sep 10, 2025
ad411da
devops: stop warnings as errors
taronaeo Sep 10, 2025
db13d99
devops: switch to non-macro flag
taronaeo Sep 10, 2025
7624539
devops: going the llama macro route
taronaeo Sep 10, 2025
e9a36b2
devops: add big-endian gguf test models
taronaeo Sep 10, 2025
2d3986e
devops: disable ppc64le to test s390x, check test build
taronaeo Sep 10, 2025
efc94a0
devops: dup .gguf.inp files for big-endian tests
taronaeo Sep 10, 2025
ff929fe
devops: dup .gguf.out files for big-endian too
taronaeo Sep 10, 2025
6f896de
devops: add python setup and endian byteswap
taronaeo Sep 11, 2025
ddec010
devops: pooring thing does not have s390x python3
taronaeo Sep 11, 2025
4841b54
devops: add missing rust compiler for s390x
taronaeo Sep 11, 2025
fb7aa67
devops: try rust actions runner
taronaeo Sep 11, 2025
112be25
Revert "devops: try rust actions runner"
taronaeo Sep 11, 2025
bedce8d
devops: try a different path for rust
taronaeo Sep 11, 2025
e0a82b7
devops: dump home directory and user info
taronaeo Sep 11, 2025
c960ab4
devops: install gguf-py only
taronaeo Sep 11, 2025
fd738d4
devops: missed relative path
taronaeo Sep 11, 2025
7602eee
devops: remove big-endian files since local swapping is working
taronaeo Sep 11, 2025
1da6186
devops: revert test-tokenizer-0 cmakelists
taronaeo Sep 11, 2025
841ac94
Fix unicode flags conversion from and to uint16_t
AlekseiNikiforovIBM Jan 10, 2025
1911ac2
Simplify byteswap command
AlekseiNikiforovIBM Sep 15, 2025
ec2ec37
Add byteswapping and git-lfs for test-tokenizers-ggml-vocabs
AlekseiNikiforovIBM Sep 15, 2025
3b88055
Fix endianness detection in vocab loader
AlekseiNikiforovIBM Sep 15, 2025
1d13fde
Disable test-thread-safety on s390x
AlekseiNikiforovIBM Sep 16, 2025
9662210
Fix q8_0 test in test-quantize-fns
AlekseiNikiforovIBM Sep 16, 2025
e3e5891
devops: add big-endian stories260K
taronaeo Sep 22, 2025
63fee7f
devops: add s390x test-eval-callback
taronaeo Sep 22, 2025
c5ea535
devops: fix test does not exist
taronaeo Sep 22, 2025
d29864c
devops: fix model not found llama-eval-callback
taronaeo Sep 22, 2025
2a3fa9f
Fix q3_K dot product error in test-quantize-fns on s390x
AlekseiNikiforovIBM Sep 22, 2025
d4e2650
devops: re-enable ppc64le for testing
taronaeo Sep 23, 2025
82074b5
devops: activate test-thread-safety for s390x
taronaeo Sep 23, 2025
5310bbb
devops: disable ppc64le tests
taronaeo Sep 23, 2025
41fb59a
devops: LLAMA_FATAL_WARNINGS=ON
taronaeo Sep 23, 2025
5b79489
Correct repository URL for s390x for test-thread-safety model
AlekseiNikiforovIBM Sep 24, 2025
6d8d61a
Fix fs_get_cache_directory
AlekseiNikiforovIBM Sep 25, 2025
4aa2c9e
Re-enable CI for ppc64le
AlekseiNikiforovIBM Sep 25, 2025
461add2
Fortify ggml_rope_impl
AlekseiNikiforovIBM Sep 25, 2025
6dbeda3
Add TODO in struct unicode_cpt_flags to reimplement it in endian-inde…
AlekseiNikiforovIBM Sep 26, 2025
e33d9ee
Update URL for big-endian model
AlekseiNikiforovIBM Sep 26, 2025
f3ada9e
Update .github/workflows/build.yml
AlekseiNikiforovIBM Sep 26, 2025
8ab0491
Update remaining mentions of BE models to ggml-org/models repo
AlekseiNikiforovIBM Sep 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 0 additions & 91 deletions .github/workflows/build-linux-cross.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,97 +141,6 @@ jobs:

# cmake --build build --config Release -j $(nproc)

ubuntu-24-ppc64el-cpu-cross:
runs-on: ubuntu-24.04

steps:
- uses: actions/checkout@v4
- name: Setup PowerPC64le
run: |
sudo dpkg --add-architecture ppc64el

# Add arch-specific repositories for non-amd64 architectures
cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
EOF

sudo apt-get update || true ;# Prevent failure due to missing URLs.

sudo apt-get install -y --no-install-recommends \
build-essential \
gcc-14-powerpc64le-linux-gnu \
g++-14-powerpc64le-linux-gnu

- name: Build
run: |
cmake -B build -DLLAMA_CURL=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
-DLLAMA_BUILD_TOOLS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_SYSTEM_PROCESSOR=ppc64 \
-DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
-DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH

cmake --build build --config Release -j $(nproc)

# ubuntu-24-ppc64el-vulkan-cross:
# runs-on: ubuntu-24.04

# steps:
# - uses: actions/checkout@v4
# - name: Setup PowerPC64le
# run: |
# sudo dpkg --add-architecture ppc64el

# # Add arch-specific repositories for non-amd64 architectures
# cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
# deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
# deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
# deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
# deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
# EOF

# sudo apt-get update || true ;# Prevent failure due to missing URLs.

# sudo apt-get install -y --no-install-recommends \
# build-essential \
# glslc \
# gcc-14-powerpc64le-linux-gnu \
# g++-14-powerpc64le-linux-gnu \
# libvulkan-dev:ppc64el

# - name: Build
# run: |
# cmake -B build -DLLAMA_CURL=OFF \
# -DCMAKE_BUILD_TYPE=Release \
# -DGGML_VULKAN=ON \
# -DGGML_OPENMP=OFF \
# -DLLAMA_BUILD_EXAMPLES=ON \
# -DLLAMA_BUILD_TOOLS=ON \
# -DLLAMA_BUILD_TESTS=OFF \
# -DCMAKE_SYSTEM_NAME=Linux \
# -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
# -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
# -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
# -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
# -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
# -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
# -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
# -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH

# cmake --build build --config Release -j $(nproc)

debian-13-loongarch64-cpu-cross:
runs-on: ubuntu-24.04
container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
Expand Down
37 changes: 34 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@ jobs:
os: ubuntu-22.04
- build: 'arm64'
os: ubuntu-22.04-arm
- build: 's390x'
os: ubuntu-24.04-s390x
- build: 'ppc64le'
os: ubuntu-24.04-ppc64le

runs-on: ${{ matrix.os }}

Expand All @@ -206,11 +210,28 @@ jobs:
key: ubuntu-cpu-cmake
evict-old-files: 1d

- name: Dependencies
id: depends
- name: Build Dependencies
id: build_depends
run: |
sudo apt-get update
sudo apt-get install build-essential libcurl4-openssl-dev
sudo apt-get install -y --no-install-recommends \
python3 python3-pip python3-dev \
libjpeg-dev build-essential libcurl4-openssl-dev \
git-lfs

- name: Python Dependencies
id: python_depends
run: |
python3 -m pip install --upgrade pip
pip3 install ./gguf-py

- name: Swap Endianness
id: endianness
if: ${{ matrix.build == 's390x' }}
run: |
for f in models/*.gguf; do
echo YES | python3 gguf-py/gguf/scripts/gguf_convert_endian.py $f big
done

- name: Build
id: cmake_build
Expand All @@ -228,6 +249,7 @@ jobs:

- name: Test llama2c conversion
id: llama2c_test
if: ${{ matrix.build != 's390x' }}
run: |
cd build
echo "Fetch tokenizer"
Expand All @@ -237,6 +259,15 @@ jobs:
./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256

- name: Test llama2c (s390x)
id: llama2c_test_s390x
if: ${{ matrix.build == 's390x' }}
run: |
cd build
echo "Fetch llama2c big-endian model"
wget https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-be.gguf
./bin/llama-cli -m stories260K-be.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256

ubuntu-latest-cmake-sanitizer:
runs-on: ubuntu-latest

Expand Down
19 changes: 18 additions & 1 deletion common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@
#include <unistd.h>
#endif

#if defined(__linux__)
#include <sys/types.h>
#include <pwd.h>
#endif

#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
Expand Down Expand Up @@ -865,8 +870,20 @@ std::string fs_get_cache_directory() {
#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) || defined(__OpenBSD__)
if (std::getenv("XDG_CACHE_HOME")) {
cache_directory = std::getenv("XDG_CACHE_HOME");
} else {
} else if (std::getenv("HOME")) {
cache_directory = std::getenv("HOME") + std::string("/.cache/");
} else {
#if defined(__linux__)
/* no $HOME is defined, fallback to getpwuid */
struct passwd *pw = getpwuid(getuid());
if ((!pw) || (!pw->pw_dir)) {
throw std::runtime_error("Failed to find $HOME directory");
}

cache_directory = std::string(pw->pw_dir) + std::string("/.cache/");
#else /* defined(__linux__) */
throw std::runtime_error("Failed to find $HOME directory");
#endif /* defined(__linux__) */
}
#elif defined(__APPLE__)
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
Expand Down
9 changes: 7 additions & 2 deletions examples/eval-callback/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_17)

set(TEST_TARGET test-eval-callback)
add_test(NAME ${TEST_TARGET}
COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42 -ngl 0)
if(NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
add_test(NAME ${TEST_TARGET}
COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K.gguf --model stories260K.gguf --prompt hello --seed 42 -ngl 0)
else()
add_test(NAME ${TEST_TARGET}
COMMAND llama-eval-callback --hf-repo ggml-org/models --hf-file tinyllamas/stories260K-be.gguf --model stories260K-be.gguf --prompt hello --seed 42 -ngl 0)
endif()
set_property(TEST ${TEST_TARGET} PROPERTY LABELS eval-callback curl)
8 changes: 5 additions & 3 deletions ggml/src/ggml-cpu/arch/s390/quants.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i

for (int j = 0; j < 8; j++) {
const float32x4_t v = vec_mul(srcv[j], vec_splats(id));
const int32x4_t vi = vec_signed(v);
/* Uses non-default rounding for vec_signed or vec_round */
const int32x4_t vi = vec_signed(__builtin_s390_vfisb(v, 4, 1));

y[i].qs[4*j + 0] = vec_extract(vi, 0);
y[i].qs[4*j + 1] = vec_extract(vi, 1);
Expand Down Expand Up @@ -122,7 +123,8 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i

for (int j = 0; j < 8; j++) {
const float32x4_t v = vec_mul(srcv[j], vec_splats(id));
const int32x4_t vi = vec_signed(v);
/* Uses non-default rounding for vec_signed or vec_round */
const int32x4_t vi = vec_signed(__builtin_s390_vfisb(v, 4, 1));

y[i].qs[4*j + 0] = vec_extract(vi, 0);
y[i].qs[4*j + 1] = vec_extract(vi, 1);
Expand Down Expand Up @@ -731,7 +733,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
uint8x16_t q3h[4];
uint8x16_t q3b[2];
int8x16_t q3bytes[4];
int8x16_t q8bytes[4];
int8x16_t q8bytes[8];
uint8x16_t qhbits[2];

float sum = 0;
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -3927,7 +3927,7 @@ static struct ggml_tensor * ggml_rope_impl(
memcpy(params + 8, &attn_factor, sizeof(float));
memcpy(params + 9, &beta_fast, sizeof(float));
memcpy(params + 10, &beta_slow, sizeof(float));
if (mrope_used) {
if (mrope_used && sections) {
memcpy(params + 11, sections, sizeof(int32_t) * GGML_MROPE_SECTIONS);
} else {
memset(params + 11, 0, sizeof(int32_t) * GGML_MROPE_SECTIONS);
Expand Down
2 changes: 1 addition & 1 deletion src/llama-vocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1772,7 +1772,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
const size_t n_precompiled_charsmap = gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
#ifdef IS_BIG_ENDIAN
#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// correct endiannes of data in precompiled_charsmap binary blob
uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0];
*xcda_blob_size = __builtin_bswap32(*xcda_blob_size);
Expand Down
43 changes: 43 additions & 0 deletions src/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <string>
#include <vector>

// TODO: reimplement this structure in endian-independent way
struct unicode_cpt_flags {
enum {
UNDEFINED = 0x0001,
Expand All @@ -15,6 +16,10 @@ struct unicode_cpt_flags {
SYMBOL = 0x0040, // regex: \p{S}
CONTROL = 0x0080, // regex: \p{C}
MASK_CATEGORIES = 0x00FF,
WHITESPACE = 0x0100,
LOWERCASE = 0x0200,
UPPERCASE = 0x0400,
NFD = 0x0800,
};

// codepoint type
Expand All @@ -34,11 +39,49 @@ struct unicode_cpt_flags {

// decode from uint16
inline unicode_cpt_flags(const uint16_t flags = 0) {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
*reinterpret_cast<uint16_t*>(this) = flags;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
is_undefined = (flags & UNDEFINED) ? 1 : 0;
is_number = (flags & NUMBER) ? 1 : 0;
is_letter = (flags & LETTER) ? 1 : 0;
is_separator = (flags & SEPARATOR) ? 1 : 0;
is_accent_mark = (flags & ACCENT_MARK) ? 1 : 0;
is_punctuation = (flags & PUNCTUATION) ? 1 : 0;
is_symbol = (flags & SYMBOL) ? 1 : 0;
is_control = (flags & CONTROL) ? 1 : 0;
is_whitespace = (flags & WHITESPACE) ? 1 : 0;
is_lowercase = (flags & LOWERCASE) ? 1 : 0;
is_uppercase = (flags & UPPERCASE) ? 1 : 0;
is_nfd = (flags & NFD) ? 1 : 0;
#else
#error Unexpected or undefined __BYTE_ORDER__
#endif
}

inline uint16_t as_uint() const {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
return *reinterpret_cast<const uint16_t*>(this);
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
uint16_t result =
is_undefined * UNDEFINED
+ is_number * NUMBER
+ is_letter * LETTER
+ is_separator * SEPARATOR
+ is_accent_mark * ACCENT_MARK
+ is_punctuation * PUNCTUATION
+ is_symbol * SYMBOL
+ is_control * CONTROL
+ is_whitespace * WHITESPACE
+ is_lowercase * LOWERCASE
+ is_uppercase * UPPERCASE
+ is_nfd * NFD
;

return result;
#else
#error Unexpected or undefined __BYTE_ORDER__
#endif
}

inline uint16_t category_flag() const {
Expand Down
6 changes: 5 additions & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,11 @@ llama_build_and_test(test-json-partial.cpp)
llama_build_and_test(test-log.cpp)
llama_build_and_test(test-regex-partial.cpp)

llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
else()
llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-be.Q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
endif()

# this fails on windows (github hosted runner) due to curl DLL not found (exit code 0xc0000135)
if (NOT WIN32)
Expand Down
7 changes: 7 additions & 0 deletions tests/test-tokenizers-repo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ if [ -d $folder ] && [ -d $folder/.git ]; then
(cd $folder; git pull)
else
git clone $repo $folder

# byteswap models if on big endian
if [ "$(uname -m)" = s390x ]; then
for f in $folder/*/*.gguf; do
echo YES | python3 "$(dirname $0)/../gguf-py/gguf/scripts/gguf_convert_endian.py" $f big
done
fi
fi

shopt -s globstar
Expand Down
Loading