Skip to content

Commit 0c561e6

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents e5a93bd + 1fe4e38 commit 0c561e6

File tree

22 files changed

+472
-260
lines changed

22 files changed

+472
-260
lines changed

.devops/rocm.Dockerfile

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
ARG UBUNTU_VERSION=24.04
22

33
# This needs to generally match the container host's environment.
4-
ARG ROCM_VERSION=6.4
5-
ARG AMDGPU_VERSION=6.4
4+
ARG ROCM_VERSION=7.0
5+
ARG AMDGPU_VERSION=7.0
66

77
# Target the ROCm build image
88
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
@@ -13,11 +13,10 @@ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
1313
# Unless otherwise specified, we make a fat build.
1414
# List from https://github.com/ggml-org/llama.cpp/pull/1087#issuecomment-1682807878
1515
# This is mostly tied to rocBLAS supported archs.
16-
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
17-
# gfx906 is deprecated
18-
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
16+
# gfx803, gfx900, gfx906, gfx1032, gfx1101, gfx1102,not officialy supported
17+
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.4.1/reference/system-requirements.html
1918

20-
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
19+
ARG ROCM_DOCKER_ARCH='gfx803;gfx900;gfx906;gfx1010;gfx1030;gfx1032;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201;gfx1151'
2120
#ARG ROCM_DOCKER_ARCH='gfx1151'
2221

2322
# Set ROCm architectures
@@ -36,13 +35,10 @@ WORKDIR /app
3635

3736
COPY . .
3837

39-
RUN git clone https://github.com/rocm/rocwmma --branch develop --depth 1
40-
4138
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
4239
cmake -S . -B build \
4340
-DGGML_HIP=ON \
4441
-DGGML_HIP_ROCWMMA_FATTN=ON \
45-
-DCMAKE_HIP_FLAGS="-I$(pwd)/rocwmma/library/include/" \
4642
-DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
4743
-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
4844
-DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \

.github/workflows/build.yml

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ jobs:
487487
id: depends
488488
run: |
489489
sudo apt-get update
490-
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libcurl4-openssl-dev
490+
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libcurl4-openssl-dev rocwmma-dev
491491
492492
- name: ccache
493493
uses: ggml-org/[email protected]
@@ -1097,10 +1097,12 @@ jobs:
10971097
id: checkout
10981098
uses: actions/checkout@v4
10991099

1100-
- name: Clone rocWMMA repository
1101-
id: clone_rocwmma
1100+
- name: Grab rocWMMA package
1101+
id: grab_rocwmma
11021102
run: |
1103-
git clone https://github.com/rocm/rocwmma --branch rocm-${{ env.ROCM_VERSION }} --depth 1
1103+
curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/${{ env.ROCM_VERSION }}/pool/main/r/rocwmma-dev/rocwmma-dev_1.7.0.60402-120~24.04_amd64.deb"
1104+
7z x rocwmma.deb
1105+
7z x data.tar
11041106
11051107
- name: Cache ROCm Installation
11061108
id: cache-rocm
@@ -1161,8 +1163,9 @@ jobs:
11611163
cmake -G "Unix Makefiles" -B build -S . `
11621164
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
11631165
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1164-
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" `
1166+
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-${{ env.ROCM_VERSION }}/include/" `
11651167
-DCMAKE_BUILD_TYPE=Release `
1168+
-DROCM_DIR="${env:HIP_PATH}" `
11661169
-DGGML_HIP=ON `
11671170
-DGGML_HIP_ROCWMMA_FATTN=ON `
11681171
-DGGML_RPC=ON `

.github/workflows/docker.yml

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,15 @@ jobs:
8989
TYPE="-${{ matrix.config.tag }}"
9090
fi
9191
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
92+
CACHETAGS="${PREFIX}buildcache${TYPE}"
9293
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}-${{ steps.srctag.outputs.name }}"
9394
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}-${{ steps.srctag.outputs.name }}"
9495
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}-${{ steps.srctag.outputs.name }}"
96+
echo "cache_output_tags=$CACHETAGS" >> $GITHUB_OUTPUT
9597
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
9698
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
9799
echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
100+
echo "cache_output_tags=$CACHETAGS" # print out for debugging
98101
echo "full_output_tags=$FULLTAGS" # print out for debugging
99102
echo "light_output_tags=$LIGHTTAGS" # print out for debugging
100103
echo "server_output_tags=$SERVERTAGS" # print out for debugging
@@ -131,11 +134,14 @@ jobs:
131134
target: full
132135
provenance: false
133136
# using github experimental cache
134-
cache-from: type=gha
135-
cache-to: type=gha,mode=max
137+
#cache-from: type=gha
138+
#cache-to: type=gha,mode=max
136139
# return to this if the experimental github cache is having issues
137140
#cache-to: type=local,dest=/tmp/.buildx-cache
138141
#cache-from: type=local,src=/tmp/.buildx-cache
142+
# using registry cache (no storage limit)
143+
cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }}
144+
cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max
139145

140146
- name: Build and push Light Docker image (tagged + versioned)
141147
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
@@ -150,11 +156,14 @@ jobs:
150156
target: light
151157
provenance: false
152158
# using github experimental cache
153-
cache-from: type=gha
154-
cache-to: type=gha,mode=max
159+
#cache-from: type=gha
160+
#cache-to: type=gha,mode=max
155161
# return to this if the experimental github cache is having issues
156162
#cache-to: type=local,dest=/tmp/.buildx-cache
157163
#cache-from: type=local,src=/tmp/.buildx-cache
164+
# using registry cache (no storage limit)
165+
cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }}
166+
cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max
158167

159168
- name: Build and push Server Docker image (tagged + versioned)
160169
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
@@ -169,11 +178,14 @@ jobs:
169178
target: server
170179
provenance: false
171180
# using github experimental cache
172-
cache-from: type=gha
173-
cache-to: type=gha,mode=max
181+
#cache-from: type=gha
182+
#cache-to: type=gha,mode=max
174183
# return to this if the experimental github cache is having issues
175184
#cache-to: type=local,dest=/tmp/.buildx-cache
176185
#cache-from: type=local,src=/tmp/.buildx-cache
186+
# using registry cache (no storage limit)
187+
cache-from: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }}
188+
cache-to: type=registry,ref=${{ steps.tag.outputs.cache_output_tags }},mode=max
177189

178190
create_tag:
179191
name: Create and push git tag

.github/workflows/release.yml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -543,10 +543,12 @@ jobs:
543543
id: checkout
544544
uses: actions/checkout@v4
545545

546-
- name: Clone rocWMMA repository
547-
id: clone_rocwmma
546+
- name: Grab rocWMMA package
547+
id: grab_rocwmma
548548
run: |
549-
git clone https://github.com/rocm/rocwmma --branch develop --depth 1
549+
curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.0.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.0.0.70001-42~24.04_amd64.deb"
550+
7z x rocwmma.deb
551+
7z x data.tar
550552
551553
- name: Cache ROCm Installation
552554
id: cache-rocm
@@ -601,7 +603,7 @@ jobs:
601603
cmake -G "Unix Makefiles" -B build -S . `
602604
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
603605
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
604-
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/ -Wno-ignored-attributes -Wno-nested-anon-types" `
606+
-DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.0.1/include/ -Wno-ignored-attributes -Wno-nested-anon-types" `
605607
-DCMAKE_BUILD_TYPE=Release `
606608
-DGGML_BACKEND_DL=ON `
607609
-DGGML_NATIVE=OFF `

CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
/common/build-info.* @ggerganov
1515
/common/common.* @ggerganov
1616
/common/console.* @ggerganov
17+
/common/http.* @angt
1718
/common/llguidance.* @ggerganov
1819
/common/log.* @ggerganov
1920
/common/sampling.* @ggerganov

common/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ add_library(${TARGET} STATIC
5656
common.h
5757
console.cpp
5858
console.h
59+
http.h
5960
json-partial.cpp
6061
json-partial.h
6162
json-schema-to-grammar.cpp

common/arg.cpp

Lines changed: 4 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,11 @@
3232
#include <thread>
3333
#include <vector>
3434

35-
//#define LLAMA_USE_CURL
36-
3735
#if defined(LLAMA_USE_CURL)
3836
#include <curl/curl.h>
3937
#include <curl/easy.h>
4038
#else
41-
#include <cpp-httplib/httplib.h>
39+
#include "http.h"
4240
#endif
4341

4442
#ifdef __linux__
@@ -596,77 +594,6 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
596594

597595
#else
598596

599-
struct common_url {
600-
std::string scheme;
601-
std::string user;
602-
std::string password;
603-
std::string host;
604-
std::string path;
605-
};
606-
607-
static common_url parse_url(const std::string & url) {
608-
common_url parts;
609-
auto scheme_end = url.find("://");
610-
611-
if (scheme_end == std::string::npos) {
612-
throw std::runtime_error("invalid URL: no scheme");
613-
}
614-
parts.scheme = url.substr(0, scheme_end);
615-
616-
if (parts.scheme != "http" && parts.scheme != "https") {
617-
throw std::runtime_error("unsupported URL scheme: " + parts.scheme);
618-
}
619-
620-
auto rest = url.substr(scheme_end + 3);
621-
auto at_pos = rest.find('@');
622-
623-
if (at_pos != std::string::npos) {
624-
auto auth = rest.substr(0, at_pos);
625-
auto colon_pos = auth.find(':');
626-
if (colon_pos != std::string::npos) {
627-
parts.user = auth.substr(0, colon_pos);
628-
parts.password = auth.substr(colon_pos + 1);
629-
} else {
630-
parts.user = auth;
631-
}
632-
rest = rest.substr(at_pos + 1);
633-
}
634-
635-
auto slash_pos = rest.find('/');
636-
637-
if (slash_pos != std::string::npos) {
638-
parts.host = rest.substr(0, slash_pos);
639-
parts.path = rest.substr(slash_pos);
640-
} else {
641-
parts.host = rest;
642-
parts.path = "/";
643-
}
644-
return parts;
645-
}
646-
647-
static std::pair<httplib::Client, common_url> http_client(const std::string & url) {
648-
common_url parts = parse_url(url);
649-
650-
if (parts.host.empty()) {
651-
throw std::runtime_error("error: invalid URL format");
652-
}
653-
654-
if (!parts.user.empty()) {
655-
throw std::runtime_error("error: user:password@ not supported yet"); // TODO
656-
}
657-
658-
httplib::Client cli(parts.scheme + "://" + parts.host);
659-
cli.set_follow_location(true);
660-
661-
// TODO cert
662-
663-
return { std::move(cli), std::move(parts) };
664-
}
665-
666-
static std::string show_masked_url(const common_url & parts) {
667-
return parts.scheme + "://" + (parts.user.empty() ? "" : "****:****@") + parts.host + parts.path;
668-
}
669-
670597
static void print_progress(size_t current, size_t total) {
671598
if (!is_output_a_tty()) {
672599
return;
@@ -759,7 +686,7 @@ static bool common_download_file_single_online(const std::string & url,
759686
static const int max_attempts = 3;
760687
static const int retry_delay_seconds = 2;
761688

762-
auto [cli, parts] = http_client(url);
689+
auto [cli, parts] = common_http_client(url);
763690

764691
httplib::Headers default_headers = {{"User-Agent", "llama-cpp"}};
765692
if (!bearer_token.empty()) {
@@ -839,7 +766,7 @@ static bool common_download_file_single_online(const std::string & url,
839766

840767
// start the download
841768
LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n",
842-
__func__, show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
769+
__func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
843770
const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size);
844771
if (!was_pull_successful) {
845772
if (i + 1 < max_attempts) {
@@ -867,7 +794,7 @@ static bool common_download_file_single_online(const std::string & url,
867794

868795
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url,
869796
const common_remote_params & params) {
870-
auto [cli, parts] = http_client(url);
797+
auto [cli, parts] = common_http_client(url);
871798

872799
httplib::Headers headers = {{"User-Agent", "llama-cpp"}};
873800
for (const auto & header : params.headers) {

common/http.h

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#pragma once
2+
3+
#include <cpp-httplib/httplib.h>
4+
5+
struct common_http_url {
6+
std::string scheme;
7+
std::string user;
8+
std::string password;
9+
std::string host;
10+
std::string path;
11+
};
12+
13+
static common_http_url common_http_parse_url(const std::string & url) {
14+
common_http_url parts;
15+
auto scheme_end = url.find("://");
16+
17+
if (scheme_end == std::string::npos) {
18+
throw std::runtime_error("invalid URL: no scheme");
19+
}
20+
parts.scheme = url.substr(0, scheme_end);
21+
22+
if (parts.scheme != "http" && parts.scheme != "https") {
23+
throw std::runtime_error("unsupported URL scheme: " + parts.scheme);
24+
}
25+
26+
auto rest = url.substr(scheme_end + 3);
27+
auto at_pos = rest.find('@');
28+
29+
if (at_pos != std::string::npos) {
30+
auto auth = rest.substr(0, at_pos);
31+
auto colon_pos = auth.find(':');
32+
if (colon_pos != std::string::npos) {
33+
parts.user = auth.substr(0, colon_pos);
34+
parts.password = auth.substr(colon_pos + 1);
35+
} else {
36+
parts.user = auth;
37+
}
38+
rest = rest.substr(at_pos + 1);
39+
}
40+
41+
auto slash_pos = rest.find('/');
42+
43+
if (slash_pos != std::string::npos) {
44+
parts.host = rest.substr(0, slash_pos);
45+
parts.path = rest.substr(slash_pos);
46+
} else {
47+
parts.host = rest;
48+
parts.path = "/";
49+
}
50+
return parts;
51+
}
52+
53+
static std::pair<httplib::Client, common_http_url> common_http_client(const std::string & url) {
54+
common_http_url parts = common_http_parse_url(url);
55+
56+
if (parts.host.empty()) {
57+
throw std::runtime_error("error: invalid URL format");
58+
}
59+
60+
httplib::Client cli(parts.scheme + "://" + parts.host);
61+
62+
if (!parts.user.empty()) {
63+
cli.set_basic_auth(parts.user, parts.password);
64+
}
65+
66+
cli.set_follow_location(true);
67+
68+
return { std::move(cli), std::move(parts) };
69+
}
70+
71+
static std::string common_http_show_masked_url(const common_http_url & parts) {
72+
return parts.scheme + "://" + (parts.user.empty() ? "" : "****:****@") + parts.host + parts.path;
73+
}

0 commit comments

Comments
 (0)