Skip to content

Commit cdc0ffa

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 67cf20b + 4710dd3 commit cdc0ffa

File tree

4 files changed

+132
-15
lines changed

4 files changed

+132
-15
lines changed

.github/workflows/docker.yml

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -68,40 +68,36 @@ jobs:
6868
username: ${{ github.repository_owner }}
6969
password: ${{ secrets.GITHUB_TOKEN }}
7070

71-
- name: Determine tag name
71+
- name: Determine source tag name
72+
id: srctag
73+
uses: ./.github/actions/get-tag-name
74+
env:
75+
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
76+
77+
- name: Determine image tag name
7278
id: tag
7379
shell: bash
7480
run: |
75-
BUILD_NUMBER="$(git rev-list --count HEAD)"
76-
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
7781
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
7882
REPO_NAME="${{ github.event.repository.name }}"
7983
80-
# determine tag name postfix (build number, commit hash)
81-
if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
82-
TAG_POSTFIX="-b${BUILD_NUMBER}"
83-
else
84-
SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
85-
TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}"
86-
fi
8784
# list all tags possible
8885
if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
8986
TYPE=""
9087
else
9188
TYPE="-${{ matrix.config.tag }}"
9289
fi
9390
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
94-
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}"
95-
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}"
96-
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}"
91+
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}"
92+
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}"
93+
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}"
9794
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
9895
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
9996
echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
10097
echo "full_output_tags=$FULLTAGS" # print out for debugging
10198
echo "light_output_tags=$LIGHTTAGS" # print out for debugging
10299
echo "server_output_tags=$SERVERTAGS" # print out for debugging
103100
env:
104-
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
105101
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
106102

107103
- name: Free Disk Space (Ubuntu)
@@ -177,3 +173,29 @@ jobs:
177173
# return to this if the experimental github cache is having issues
178174
#cache-to: type=local,dest=/tmp/.buildx-cache
179175
#cache-from: type=local,src=/tmp/.buildx-cache
176+
177+
create_tag:
178+
name: Create and push git tag
179+
runs-on: ubuntu-22.04
180+
permissions:
181+
contents: write
182+
183+
steps:
184+
- name: Clone
185+
id: checkout
186+
uses: actions/checkout@v4
187+
with:
188+
fetch-depth: 0
189+
190+
- name: Determine source tag name
191+
id: srctag
192+
uses: ./.github/actions/get-tag-name
193+
env:
194+
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
195+
196+
- name: Create and push git tag
197+
env:
198+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
199+
run: |
200+
git tag ${{ steps.srctag.outputs.name }} || exit 0
201+
git push origin ${{ steps.srctag.outputs.name }} || exit 0

build-xcframework.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,7 @@ echo "Building for iOS devices..."
422422
cmake -B build-ios-device -G Xcode \
423423
"${COMMON_CMAKE_ARGS[@]}" \
424424
-DCMAKE_OSX_DEPLOYMENT_TARGET=${IOS_MIN_OS_VERSION} \
425+
-DCMAKE_SYSTEM_NAME=iOS \
425426
-DCMAKE_OSX_SYSROOT=iphoneos \
426427
-DCMAKE_OSX_ARCHITECTURES="arm64" \
427428
-DCMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS=iphoneos \

ggml/src/ggml-cpu/arch-fallback.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@
160160
#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K
161161
#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K
162162
#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K
163-
#define ggml_vec_dot_mxfp4_q8_0_generic ggml_vec_dot_mxfp4_q8_0
164163
// repack.cpp
165164
#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4
166165
#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8

ggml/src/ggml-cpu/arch/s390/quants.c

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,101 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi
260260
#endif
261261
}
262262

263+
void ggml_vec_dot_mxfp4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
264+
assert(nrc == 1);
265+
UNUSED(nrc);
266+
UNUSED(bx);
267+
UNUSED(by);
268+
UNUSED(bs);
269+
assert(n % QK_MXFP4 == 0);
270+
static_assert(QK_MXFP4 == QK8_0, "QK_MXFP4 and QK8_0 must be the same");
271+
272+
const int qk = QK_MXFP4;
273+
const int nb = n / qk;
274+
275+
const block_mxfp4 * GGML_RESTRICT x = vx;
276+
const block_q8_0 * GGML_RESTRICT y = vy;
277+
278+
int ib = 0;
279+
float sumf = 0.0f;
280+
281+
#if defined(__VXE__) || defined(__VXE2__)
282+
const int8x16_t v_k = vec_xl(0, kvalues_mxfp4);
283+
const uint8x16_t v_m = vec_splats((const uint8_t)0x0F);
284+
285+
float32x4_t v_acc = vec_splats(0.0f);
286+
287+
#pragma GCC unroll 8
288+
for (; ib + 1 < nb; ib += 2) {
289+
const block_mxfp4 * GGML_RESTRICT x0 = &x[ib + 0];
290+
const block_mxfp4 * GGML_RESTRICT x1 = &x[ib + 1];
291+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];
292+
const block_q8_0 * GGML_RESTRICT y1 = &y[ib + 1];
293+
294+
const uint8x16_t v_x0 = vec_xl(0, x0->qs);
295+
const uint8x16_t v_x1 = vec_xl(0, x1->qs);
296+
297+
int8x16_t v_x0l = (int8x16_t)vec_and(v_x0, v_m);
298+
int8x16_t v_x0h = (int8x16_t)vec_sr(v_x0, 4);
299+
int8x16_t v_x1l = (int8x16_t)vec_and(v_x1, v_m);
300+
int8x16_t v_x1h = (int8x16_t)vec_sr(v_x1, 4);
301+
302+
v_x0l = vec_perm(v_k, v_k, (uchar8x16_t)v_x0l);
303+
v_x0h = vec_perm(v_k, v_k, (uchar8x16_t)v_x0h);
304+
v_x1l = vec_perm(v_k, v_k, (uchar8x16_t)v_x1l);
305+
v_x1h = vec_perm(v_k, v_k, (uchar8x16_t)v_x1h);
306+
307+
const int8x16_t v_y0l = vec_xl(0, y0->qs);
308+
const int8x16_t v_y0h = vec_xl(QK8_0/2, y0->qs);
309+
const int8x16_t v_y1l = vec_xl(0, y1->qs);
310+
const int8x16_t v_y1h = vec_xl(QK8_0/2, y1->qs);
311+
312+
const int32x4_t v_xy0 = ggml_vec_dot(ggml_vec_dot(vec_splats(0), v_x0l, v_y0l), v_x0h, v_y0h);
313+
const int32x4_t v_xy1 = ggml_vec_dot(ggml_vec_dot(vec_splats(0), v_x1l, v_y1l), v_x1h, v_y1h);
314+
315+
const float32x4_t v_xy0f = vec_float(v_xy0);
316+
const float32x4_t v_xy1f = vec_float(v_xy1);
317+
318+
const float32x4_t v_d0 = vec_splats(GGML_E8M0_TO_FP32_HALF(x0->e) * GGML_CPU_FP16_TO_FP32(y0->d));
319+
const float32x4_t v_d1 = vec_splats(GGML_E8M0_TO_FP32_HALF(x1->e) * GGML_CPU_FP16_TO_FP32(y1->d));
320+
321+
v_acc = vec_madd(v_xy0f, v_d0, v_acc);
322+
v_acc = vec_madd(v_xy1f, v_d1, v_acc);
323+
}
324+
325+
for (; ib < nb; ++ib) {
326+
const block_mxfp4 * GGML_RESTRICT x0 = &x[ib + 0];
327+
const block_q8_0 * GGML_RESTRICT y0 = &y[ib + 0];
328+
329+
const uint8x16_t v_x = vec_xl(0, x0->qs);
330+
331+
int8x16_t v_xl = (int8x16_t)vec_and(v_x, v_m);
332+
int8x16_t v_xh = (int8x16_t)vec_sr(v_x, 4);
333+
334+
v_xl = vec_perm(v_k, v_k, (uchar8x16_t)v_xl);
335+
v_xh = vec_perm(v_k, v_k, (uchar8x16_t)v_xh);
336+
337+
const int8x16_t v_yl = vec_xl(0, y0->qs);
338+
const int8x16_t v_yh = vec_xl(QK8_0/2, y0->qs);
339+
340+
const int32x4_t v_xy = ggml_vec_dot(ggml_vec_dot(vec_splats(0), v_xl, v_yl), v_xh, v_yh);
341+
const float32x4_t v_xyf = vec_float(v_xy);
342+
343+
const float32x4_t v_d = vec_splats(GGML_E8M0_TO_FP32_HALF(x0->e) * GGML_CPU_FP16_TO_FP32(y0->d));
344+
v_acc = vec_madd(v_xyf, v_d, v_acc);
345+
}
346+
347+
sumf = vec_hsum_f32x4(v_acc);
348+
*s = sumf;
349+
#else
350+
UNUSED(x);
351+
UNUSED(y);
352+
UNUSED(ib);
353+
UNUSED(sumf);
354+
ggml_vec_dot_mxfp4_q8_0_generic(n, s, bs, vx, bx, vy, by, nrc);
355+
#endif
356+
}
357+
263358
void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
264359
const int qk = QK8_0;
265360
const int nb = n / qk;

0 commit comments

Comments
 (0)