Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
10 changes: 5 additions & 5 deletions .devops/cloud-v-pipeline
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ node('x86_runner1'){ // Running on x86 runner containing latest vecto
checkout scm // Clone the repo on Runner
}
}
stage('Compiling llama.cpp'){
stage('Compiling jarvis.cpp'){
sh'''#!/bin/bash
make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling jarvis for RISC-V
'''
}
stage('Running llama.cpp'){
stage('Running jarvis.cpp'){
sh'''#!/bin/bash
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
cat llama_log.txt # Printing results
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./jarvis-cli -m /home/alitariq/codejarvis-7b.Q4_K_M.gguf -p "Anything" -n 9 > jarvis_log.txt # Running jarvis.cpp on vector qemu-riscv64
cat jarvis_log.txt # Printing results
'''
}
}
2 changes: 1 addition & 1 deletion .devops/full-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ COPY . .
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake -B build -DGGML_CUDA=ON -DJARVIS_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc) && \
cp build/bin/* .

Expand Down
2 changes: 1 addition & 1 deletion .devops/full-musa.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ WORKDIR /app

COPY . .

RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
RUN cmake -B build -DGGML_MUSA=ON -DJARVIS_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc) && \
cp build/bin/* .

Expand Down
4 changes: 2 additions & 2 deletions .devops/full-rocm.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-co
FROM ${BASE_ROCM_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# List from https://github.com/ggerganov/jarvis.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG ROCM_DOCKER_ARCH="\
gfx803 \
Expand Down Expand Up @@ -41,7 +41,7 @@ ENV CC=/opt/rocm/llvm/bin/clang
ENV CXX=/opt/rocm/llvm/bin/clang++

# Enable cURL
ENV LLAMA_CURL=1
ENV JARVIS_CURL=1
RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev

Expand Down
2 changes: 1 addition & 1 deletion .devops/full.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ WORKDIR /app

COPY . .

ENV LLAMA_CURL=1
ENV JARVIS_CURL=1


RUN make -j$(nproc)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
RUN echo "Building with static libs" && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
cmake -B build -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
cmake --build build --config Release --target llama-cli
cmake --build build --config Release --target jarvis-cli

# TODO: use image with NNRT
FROM cosdt/cann:$ASCEND_VERSION AS runtime
COPY --from=build /app/build/bin/llama-cli /llama-cli
COPY --from=build /app/build/bin/jarvis-cli /jarvis-cli

ENV LC_ALL=C.utf8

Expand All @@ -41,4 +41,4 @@ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}

ENTRYPOINT ["/llama-cli" ]
ENTRYPOINT ["/jarvis-cli" ]
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-cli -j$(nproc)
cmake --build build --config Release --target jarvis-cli -j$(nproc)

FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libgomp1

COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/build/bin/llama-cli /llama-cli
COPY --from=build /app/build/src/libjarvis.so /libjarvis.so
COPY --from=build /app/build/bin/jarvis-cli /jarvis-cli

ENTRYPOINT [ "/llama-cli" ]
ENTRYPOINT [ "/jarvis-cli" ]
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
echo "Building with static libs" && \
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx \
${OPT_SYCL_F16} -DBUILD_SHARED_LIBS=OFF && \
cmake --build build --config Release --target llama-cli
cmake --build build --config Release --target jarvis-cli

FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime

COPY --from=build /app/build/bin/llama-cli /llama-cli
COPY --from=build /app/build/bin/jarvis-cli /jarvis-cli

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/llama-cli" ]
ENTRYPOINT [ "/jarvis-cli" ]
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ WORKDIR /app
COPY . .

RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-cli -j$(nproc)
cmake --build build --config Release --target jarvis-cli -j$(nproc)

FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libgomp1

COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/build/bin/llama-cli /llama-cli
COPY --from=build /app/build/src/libjarvis.so /libjarvis.so
COPY --from=build /app/build/bin/jarvis-cli /jarvis-cli

ENTRYPOINT [ "/llama-cli" ]
ENTRYPOINT [ "/jarvis-cli" ]
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-co
FROM ${BASE_ROCM_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# List from https://github.com/ggerganov/jarvis.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG ROCM_DOCKER_ARCH="\
gfx803 \
Expand Down Expand Up @@ -40,6 +40,6 @@ ENV GGML_HIPBLAS=1
ENV CC=/opt/rocm/llvm/bin/clang
ENV CXX=/opt/rocm/llvm/bin/clang++

RUN make -j$(nproc) llama-cli
RUN make -j$(nproc) jarvis-cli

ENTRYPOINT [ "/app/llama-cli" ]
ENTRYPOINT [ "/app/jarvis-cli" ]
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
WORKDIR /app
COPY . .
RUN cmake -B build -DGGML_VULKAN=1 && \
cmake --build build --config Release --target llama-cli
cmake --build build --config Release --target jarvis-cli

# Clean up
WORKDIR /
RUN cp /app/build/bin/llama-cli /llama-cli && \
RUN cp /app/build/bin/jarvis-cli /jarvis-cli && \
rm -rf /app

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/llama-cli" ]
ENTRYPOINT [ "/jarvis-cli" ]
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ WORKDIR /app

COPY . .

RUN make -j$(nproc) llama-cli
RUN make -j$(nproc) jarvis-cli

FROM ubuntu:$UBUNTU_VERSION AS runtime

RUN apt-get update && \
apt-get install -y libgomp1

COPY --from=build /app/llama-cli /llama-cli
COPY --from=build /app/jarvis-cli /jarvis-cli

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/llama-cli" ]
ENTRYPOINT [ "/jarvis-cli" ]
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Built and maintained by John Boero - [email protected]
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal

# Notes for llama.cpp:
# Notes for jarvis.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
# We need to declare standard versioning if people want to sort latest releases.
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
Expand All @@ -12,44 +12,44 @@
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
# It is up to the user to install the correct vendor-specific support.

Name: llama.cpp-cuda
Name: jarvis.cpp-cuda
Version: %( date "+%%Y%%m%%d" )
Release: 1%{?dist}
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
Summary: CPU Inference of JARVIS model in pure C/C++ (no CUDA/OpenCL)
License: MIT
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
Source0: https://github.com/ggerganov/jarvis.cpp/archive/refs/heads/master.tar.gz
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
Requires: cuda-toolkit
URL: https://github.com/ggerganov/llama.cpp
URL: https://github.com/ggerganov/jarvis.cpp

%define debug_package %{nil}
%define source_date_epoch_from_changelog 0

%description
CPU inference for Meta's Lllama2 models using default options.
CPU inference for Meta's Ljarvis2 models using default options.

%prep
%setup -n llama.cpp-master
%setup -n jarvis.cpp-master

%build
make -j GGML_CUDA=1

%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
cp -p jarvis-cli %{buildroot}%{_bindir}/jarvis-cuda-cli
cp -p jarvis-server %{buildroot}%{_bindir}/jarvis-cuda-server
cp -p jarvis-simple %{buildroot}%{_bindir}/jarvis-cuda-simple

mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/jarviscuda.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
Description=Jarvis.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target

[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
EnvironmentFile=/etc/sysconfig/jarvis
ExecStart=/usr/bin/jarvis-cuda-server $JARVIS_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never

Expand All @@ -58,20 +58,20 @@ WantedBy=default.target
EOF

mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/jarvis
JARVIS_ARGS="-m /opt/jarvis2/ggml-model-f32.bin"
EOF

%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*

%files
%{_bindir}/llama-cuda-cli
%{_bindir}/llama-cuda-server
%{_bindir}/llama-cuda-simple
/usr/lib/systemd/system/llamacuda.service
%config /etc/sysconfig/llama
%{_bindir}/jarvis-cuda-cli
%{_bindir}/jarvis-cuda-server
%{_bindir}/jarvis-cuda-simple
/usr/lib/systemd/system/jarviscuda.service
%config /etc/sysconfig/jarvis

%pre

Expand Down
42 changes: 21 additions & 21 deletions .devops/llama-cpp.srpm.spec → .devops/jarvis-cpp.srpm.spec
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Built and maintained by John Boero - [email protected]
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal

# Notes for llama.cpp:
# Notes for jarvis.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
# We need to declare standard versioning if people want to sort latest releases.
# In the meantime, YYYYMMDD format will be used.
Expand All @@ -13,45 +13,45 @@
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
# It is up to the user to install the correct vendor-specific support.

Name: llama.cpp
Name: jarvis.cpp
Version: %( date "+%%Y%%m%%d" )
Release: 1%{?dist}
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
Summary: CPU Inference of JARVIS model in pure C/C++ (no CUDA/OpenCL)
License: MIT
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
Source0: https://github.com/ggerganov/jarvis.cpp/archive/refs/heads/master.tar.gz
BuildRequires: coreutils make gcc-c++ git libstdc++-devel
Requires: libstdc++
URL: https://github.com/ggerganov/llama.cpp
URL: https://github.com/ggerganov/jarvis.cpp

%define debug_package %{nil}
%define source_date_epoch_from_changelog 0

%description
CPU inference for Meta's Lllama2 models using default options.
CPU inference for Meta's Ljarvis2 models using default options.
Models are not included in this package and must be downloaded separately.

%prep
%setup -n llama.cpp-master
%setup -n jarvis.cpp-master

%build
make -j

%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
cp -p llama-server %{buildroot}%{_bindir}/llama-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
cp -p jarvis-cli %{buildroot}%{_bindir}/jarvis-cli
cp -p jarvis-server %{buildroot}%{_bindir}/jarvis-server
cp -p jarvis-simple %{buildroot}%{_bindir}/jarvis-simple

mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/jarvis.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
Description=Jarvis.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target

[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
EnvironmentFile=/etc/sysconfig/jarvis
ExecStart=/usr/bin/jarvis-server $JARVIS_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never

Expand All @@ -60,20 +60,20 @@ WantedBy=default.target
EOF

mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/jarvis
JARVIS_ARGS="-m /opt/jarvis2/ggml-model-f32.bin"
EOF

%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*

%files
%{_bindir}/llama-cli
%{_bindir}/llama-server
%{_bindir}/llama-simple
/usr/lib/systemd/system/llama.service
%config /etc/sysconfig/llama
%{_bindir}/jarvis-cli
%{_bindir}/jarvis-server
%{_bindir}/jarvis-simple
/usr/lib/systemd/system/jarvis.service
%config /etc/sysconfig/jarvis

%pre

Expand Down
Loading