Skip to content

Commit 0aef94b

Browse files
Merge branch 'master' of github.com:zRzRzRzRzRzRzR/llama.cpp
2 parents 2190494 + f675efd commit 0aef94b

File tree

12 files changed

+189
-14
lines changed

12 files changed

+189
-14
lines changed

.devops/cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ COPY . .
2121
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
2222
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
2323
fi && \
24-
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
24+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
2525
cmake --build build --config Release -j$(nproc)
2626

2727
RUN mkdir -p /app/lib && \

.devops/intel.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
1717
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
1818
fi && \
1919
echo "Building with dynamic libs" && \
20-
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
20+
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${OPT_SYCL_F16} && \
2121
cmake --build build --config Release -j$(nproc)
2222

2323
RUN mkdir -p /app/lib && \

.devops/musa.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ COPY . .
3535
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
3636
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
3737
fi && \
38-
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
38+
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
3939
cmake --build build --config Release -j$(nproc)
4040

4141
RUN mkdir -p /app/lib && \

.devops/rocm.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1717
# gfx906 is deprecated
1818
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
1919

20-
#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21-
ARG ROCM_DOCKER_ARCH=gfx1100
20+
ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21+
#ARG ROCM_DOCKER_ARCH=gfx1100
2222

2323
# Set nvcc architectured
2424
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
@@ -40,7 +40,7 @@ WORKDIR /app
4040
COPY . .
4141

4242
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
43-
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release \
43+
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
4444
&& cmake --build build --config Release -j$(nproc)
4545

4646
RUN mkdir -p /app/lib \

.devops/vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ WORKDIR /app
1616

1717
COPY . .
1818

19-
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
19+
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
2020
cmake --build build --config Release -j$(nproc)
2121

2222
RUN mkdir -p /app/lib && \

ggml/src/ggml-cann/acl_tensor.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ aclDataType ggml_cann_type_mapping(ggml_type type) {
4141
return ACL_INT4;
4242
case GGML_TYPE_Q8_0:
4343
return ACL_INT8;
44+
case GGML_TYPE_I64:
45+
return ACL_INT64;
4446
default:
4547
return ACL_DT_UNDEFINED;
4648
}

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@
5959
#include <aclnnop/aclnn_div.h>
6060
#include <aclnnop/aclnn_convolution.h>
6161
#include <aclnnop/aclnn_elu.h>
62+
#include <aclnnop/aclnn_log.h>
63+
#include <aclnnop/aclnn_mean.h>
64+
#include <aclnnop/aclnn_reflection_pad1d.h>
65+
#include <aclnnop/aclnn_eq_tensor.h>
66+
#include <aclnnop/aclnn_gt_scalar.h>
6267
#include <float.h>
6368

6469
#include <cmath>
@@ -2598,6 +2603,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25982603
aclTensor* acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3);
25992604

26002605
GGML_CANN_CALL_ACLNN_OP(ArgMax, acl_src, 3, false, acl_dst);
2606+
26012607
ACL_CHECK(aclDestroyTensor(acl_src));
26022608
ACL_CHECK(aclDestroyTensor(acl_dst));
26032609
}
@@ -2629,6 +2635,9 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
26292635

26302636
ACL_CHECK(aclDestroyTensor(acl_weight));
26312637
ACL_CHECK(aclDestroyTensor(acl_dst));
2638+
ACL_CHECK(aclDestroyIntArray(stride));
2639+
ACL_CHECK(aclDestroyIntArray(padding));
2640+
ACL_CHECK(aclDestroyIntArray(dilation));
26322641
}
26332642

26342643
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){
@@ -2646,4 +2655,79 @@ void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){
26462655

26472656
ACL_CHECK(aclDestroyTensor(acl_input));
26482657
ACL_CHECK(aclDestroyTensor(acl_dst));
2658+
ACL_CHECK(aclDestroyScalar(alpha));
2659+
}
2660+
2661+
void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst){
2662+
ggml_tensor * src0 = dst->src[0];
2663+
2664+
aclTensor* acl_src = ggml_cann_create_tensor(src0);
2665+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
2666+
2667+
int64_t reduceDimValue[] = {3};
2668+
aclIntArray* reduceDim = aclCreateIntArray(reduceDimValue, 1);
2669+
bool keepDim = true;
2670+
2671+
GGML_CANN_CALL_ACLNN_OP(Mean, acl_src, reduceDim, keepDim, ACL_FLOAT, acl_dst);
2672+
2673+
ACL_CHECK(aclDestroyTensor(acl_src));
2674+
ACL_CHECK(aclDestroyTensor(acl_dst));
2675+
ACL_CHECK(aclDestroyIntArray(reduceDim));
2676+
}
2677+
2678+
void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst){
2679+
ggml_tensor * src0 = dst->src[0];
2680+
int32_t *opts = (int32_t *) dst->op_params;
2681+
int64_t paddingsArray[2] = {opts[0], opts[1]};
2682+
aclIntArray* paddings = aclCreateIntArray(paddingsArray, 2);
2683+
2684+
for (int64_t i = 0; i < src0->ne[3]; i++) {
2685+
aclTensor* acl_src = ggml_cann_create_tensor(
2686+
(char*)src0->data + i * src0->ne[3],
2687+
ggml_cann_type_mapping(src0->type), ggml_element_size(src0),
2688+
src0->ne, src0->nb, 3);
2689+
2690+
aclTensor* acl_dst = ggml_cann_create_tensor(
2691+
(char*)dst->data + i * src0->ne[3],
2692+
ggml_cann_type_mapping(dst->type), ggml_element_size(dst),
2693+
dst->ne, dst->nb, 3);
2694+
2695+
GGML_CANN_CALL_ACLNN_OP(ReflectionPad1d, acl_src, paddings, acl_dst);
2696+
2697+
ACL_CHECK(aclDestroyTensor(acl_src));
2698+
ACL_CHECK(aclDestroyTensor(acl_dst));
2699+
}
2700+
ACL_CHECK(aclDestroyIntArray(paddings));
2701+
}
2702+
2703+
void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst){
2704+
ggml_tensor * src0 = dst->src[0];
2705+
ggml_tensor * src1 = dst->src[1];
2706+
2707+
aclTensor* acl_self = ggml_cann_create_tensor(src0);
2708+
aclTensor* acl_other = ggml_cann_create_tensor(src1);
2709+
2710+
GGML_CANN_CALL_ACLNN_OP(InplaceEqTensor, acl_self, acl_other);
2711+
2712+
ggml_cann_sum(ctx, dst);
2713+
2714+
ACL_CHECK(aclDestroyTensor(acl_self));
2715+
ACL_CHECK(aclDestroyTensor(acl_other));
2716+
}
2717+
2718+
void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst){
2719+
ggml_tensor * src0 = dst->src[0];
2720+
2721+
aclTensor* acl_src = ggml_cann_create_tensor(src0);
2722+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
2723+
2724+
float alphaValue = 0.0f;
2725+
aclScalar* alpha = nullptr;
2726+
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
2727+
2728+
GGML_CANN_CALL_ACLNN_OP(GtScalar, acl_src, alpha, acl_dst);
2729+
2730+
ACL_CHECK(aclDestroyTensor(acl_src));
2731+
ACL_CHECK(aclDestroyTensor(acl_dst));
2732+
ACL_CHECK(aclDestroyScalar(alpha));
26492733
}

ggml/src/ggml-cann/aclnn_ops.h

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
#include <aclnnop/aclnn_sqrt.h>
4343
#include <aclnnop/aclnn_sin.h>
4444
#include <aclnnop/aclnn_cos.h>
45+
#include <aclnnop/aclnn_log.h>
46+
#include <aclnnop/aclnn_sign.h>
4547
#include "acl_tensor.h"
4648
#include "common.h"
4749

@@ -650,6 +652,67 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
650652
*/
651653
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
652654

655+
/**
656+
* @brief Computes the mean of a ggml tensor element-wise using the CANN backend.
657+
*
658+
* @details This function calculates the element-wise mean of the input tensor.
659+
* The result is written to the destination tensor `dst`.
660+
* The mean is computed by averaging the values across the entire tensor.
661+
*
662+
* This operation is optimized using the CANN backend for high-performance inference or training.
663+
*
664+
* @param ctx The CANN context used for operations.
665+
* @param dst The destination tensor where the mean result will be stored.
666+
* dst->op is expected to be `GGML_OP_MEAN`.
667+
*/
668+
void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
669+
670+
/**
671+
* @brief Applies 1D reflect padding to a ggml tensor using the CANN backend.
672+
*
673+
* @details This function performs 1D reflect padding on the input tensor.
674+
* The amount of padding on each side is specified by parameters stored in `dst->op_params`.
675+
* The operation reflects the values at the borders of the tensor to generate the padded output.
676+
*
677+
* This operation is optimized using the CANN backend for high-performance inference or training.
678+
*
679+
* @param ctx The CANN context used for operations.
680+
* @param dst The destination tensor where the padded result will be stored.
681+
* dst->op is expected to be `GGML_OP_PAD_REFLECT_1D`.
682+
*/
683+
void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
684+
685+
/**
686+
* @brief Counts the number of equal elements in two ggml tensors using the CANN backend.
687+
*
688+
* @details This function performs an element-wise comparison between two input tensors,
689+
* and counts the number of positions where the elements are equal. The result is
690+
* stored in the destination tensor `dst` as a scalar.
691+
*
692+
* The operation is optimized using the CANN backend, making it suitable for
693+
* high-performance inference or training scenarios.
694+
*
695+
* @param ctx The CANN context used for operations.
696+
* @param dst The destination tensor where the result will be stored.
697+
* dst->op is expected to be `GGML_OP_COUNT_EQUAL`.
698+
*/
699+
void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
700+
701+
/**
702+
* @brief Applies the Step activation function to a ggml tensor using the CANN backend.
703+
*
704+
* @details This function applies a step function element-wise to the input tensor, where
705+
* each element is transformed to 1.0 if it is greater than 0, and 0.0 otherwise.
706+
* The result is stored in the destination tensor `dst`.
707+
*
708+
* This operation is accelerated using the CANN backend to improve runtime performance.
709+
*
710+
* @param ctx The CANN context used for operations.
711+
* @param dst The destination tensor where the result will be stored.
712+
* dst->op is expected to be `GGML_OP_STEP`.
713+
*/
714+
void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
715+
653716
/**
654717
* @brief Applies a element-wise operation to two input tensors using the CANN
655718
* backend.

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,6 +1358,12 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
13581358
case GGML_UNARY_OP_ELU:
13591359
ggml_cann_elu(ctx, dst);
13601360
break;
1361+
case GGML_UNARY_OP_SGN:
1362+
GGML_CANN_CALL_UNARY_OP(Sign);
1363+
break;
1364+
case GGML_UNARY_OP_STEP:
1365+
ggml_cann_step(ctx, dst);
1366+
break;
13611367
default:
13621368
return false;
13631369
}
@@ -1456,6 +1462,18 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
14561462
case GGML_OP_CONV_TRANSPOSE_1D:
14571463
ggml_cann_conv_transpose_1d(ctx, dst);
14581464
break;
1465+
case GGML_OP_LOG:
1466+
GGML_CANN_CALL_UNARY_OP(Log);
1467+
break;
1468+
case GGML_OP_MEAN:
1469+
ggml_cann_mean(ctx, dst);
1470+
break;
1471+
case GGML_OP_PAD_REFLECT_1D:
1472+
ggml_cann_pad_reflect_1d(ctx, dst);
1473+
break;
1474+
case GGML_OP_COUNT_EQUAL:
1475+
ggml_cann_count_equal(ctx, dst);
1476+
break;
14591477
default:
14601478
return false;
14611479
}
@@ -1718,6 +1736,8 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
17181736
case GGML_UNARY_OP_TANH:
17191737
case GGML_UNARY_OP_EXP:
17201738
case GGML_UNARY_OP_ELU:
1739+
case GGML_UNARY_OP_SGN:
1740+
case GGML_UNARY_OP_STEP:
17211741
return true;
17221742
default:
17231743
return false;
@@ -1851,6 +1871,10 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
18511871
case GGML_OP_COS:
18521872
case GGML_OP_SIN:
18531873
case GGML_OP_CONV_TRANSPOSE_1D:
1874+
case GGML_OP_LOG:
1875+
case GGML_OP_MEAN:
1876+
case GGML_OP_PAD_REFLECT_1D:
1877+
case GGML_OP_COUNT_EQUAL:
18541878
return true;
18551879
default:
18561880
return false;

ggml/src/ggml-cpu/ggml-cpu-impl.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,6 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
323323
#else
324324
#ifdef __POWER9_VECTOR__
325325
#include <altivec.h>
326-
#undef bool
327-
#define bool _Bool
328326
#else
329327
#if defined(_MSC_VER) || defined(__MINGW32__)
330328
#include <intrin.h>

0 commit comments

Comments
 (0)