Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
c6d4cb4
llama : minor style
ggerganov Sep 2, 2024
9c1ba55
build(nix): Package gguf-py (#5664)
ditsuke Sep 2, 2024
b60074f
llama-cli : remove duplicated log message (#9275)
nbcsm Sep 2, 2024
6e7d133
server : refactor multitask handling (#9274)
ngxson Sep 2, 2024
f771d06
ggml : add pthread includes on FreeBSD (#9258)
yurivict Sep 2, 2024
048de84
docker : fix missing binaries in full-cuda image (#9278)
slaren Sep 2, 2024
f148516
src: make tail invalid when kv cell is intersection for mamba (#9249)
kylo5aby Sep 2, 2024
48baa61
server : test script : add timeout for all requests (#9282)
ngxson Sep 2, 2024
b69a480
readme : refactor API section + remove old hot topics
ggerganov Sep 3, 2024
8962422
llama-bench : add JSONL (NDJSON) output mode (#9288)
akx Sep 3, 2024
7605ae7
flake.lock: Update (#9261)
ggerganov Sep 3, 2024
9379d3c
readme : rename result_format to response_format (#9300)
iscy Sep 4, 2024
82e3b03
rpc : make RPC servers come first in the device list (#9296)
rgerganov Sep 4, 2024
c8671ae
Fix broken links in docker.md (#9306)
carlory Sep 4, 2024
5910ea9
[SYCL] Fix DMMV dequantization (#9279)
OuadiElfarouki Sep 4, 2024
581c305
ggml : AVX2 support for Q4_0_8_8 (#8713)
Srihari-mcw Sep 4, 2024
bdf314f
llama-bench : fix NUL terminators in CPU name (#9313)
slaren Sep 5, 2024
4db0478
cuda : fix defrag with quantized KV (#9319)
slaren Sep 5, 2024
1031771
CMake fix: host for msvc compiler can only be x86 or x64 (#8624)
Xarbirus Sep 5, 2024
32b2ec8
Update build.yml (#9184)
awatuna Sep 5, 2024
9bc6db2
ggml-quants : ternary packing for TriLMs and BitNet b1.58 (#8151)
compilade Sep 6, 2024
8ebe8dd
Improve Vulkan shader build system (#9239)
mtavenrath Sep 6, 2024
4a1411b
server : fix missing lock (#9334)
ngxson Sep 6, 2024
409dc4f
ggml : fix build break for the vulkan-debug (#9265)
cyzero-kim Sep 6, 2024
815b1fb
batched-bench : add `--output-format jsonl` option (#9293)
akx Sep 6, 2024
134bc38
llama-bench : log benchmark progress (#9287)
akx Sep 6, 2024
9b2c24c
server : simplify state machine for slot (#9283)
ngxson Sep 6, 2024
6c89eb0
ci : disable rocm image creation (#9340)
slaren Sep 7, 2024
947538a
ggml : fix missing `cpu_set_t` on emscripten (#9336)
ngxson Sep 7, 2024
df270ef
llama : refactor sampling v2 (#9294)
ggerganov Sep 7, 2024
e32d081
ggml : always check bounds on get_rows operations (#9354)
slaren Sep 7, 2024
1b9ae51
common : refactor arg parser (#9308)
ngxson Sep 7, 2024
e536426
llamafile : disable sgemm for batch-size 1 (#9330)
netrunnereve Sep 7, 2024
faf69d4
llama : sanitize invalid tokens (#9357)
ggerganov Sep 7, 2024
f12295b
llama : fix empty ring buffer push (#9358)
ggerganov Sep 7, 2024
a5b5d9a
llama.android : fix build (#9350)
ggerganov Sep 7, 2024
fbb7fcf
llama : set attrs of mislabelled EOT/EOM tokens (#9348)
bakkot Sep 8, 2024
efe6a83
ggml : fix cont with transposed tensors when one dimension is 1 (ggml…
smeso Aug 28, 2024
51d964a
cuda : mark BF16 CONT as unsupported
ggerganov Aug 28, 2024
d2d3200
cann : add Ascend NPU support (whisper/2336)
MengqingCao Aug 9, 2024
ba1cf84
cann : fix doxy (ggml/0)
ggerganov Aug 28, 2024
dbbebca
ggml: fix ggml_graph_cpy undefined behavior (ggml/943)
JohannesGaessler Aug 31, 2024
202084d
tests: add gradient tests for all backends (ggml/932)
JohannesGaessler Sep 3, 2024
9cb9260
vulkan: correctly report support for OP_CONT (ggml/946)
smeso Sep 6, 2024
406c1a3
vulkan: add dryrun support to sin and cos ops (ggml/947)
smeso Sep 6, 2024
60a3107
scripts : option to increase git patch context
ggerganov Sep 8, 2024
385decb
sync : ggml
ggerganov Sep 8, 2024
a876861
metal : update support condition for im2col + fix warning (#0)
ggerganov Sep 8, 2024
d11bd3b
Merge branch 'master' of https://github.com/l3utterfly/llama.cpp
l3utterfly Sep 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devops/full-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-cli -j$(nproc) && \
cmake --build build --config Release -j$(nproc) && \
cp build/bin/* .

ENTRYPOINT ["/app/.devops/tools.sh"]
53 changes: 46 additions & 7 deletions .devops/nix/devshells.nix
Original file line number Diff line number Diff line change
@@ -1,13 +1,52 @@
{ inputs, ... }:

{
perSystem =
{ config, lib, ... }:
{
config,
lib,
system,
...
}:
{
devShells =
lib.concatMapAttrs
(name: package: {
${name} = package.passthru.shell;
${name + "-extra"} = package.passthru.shell-extra;
})
config.packages;
let
pkgs = import inputs.nixpkgs { inherit system; };
stdenv = pkgs.stdenv;
scripts = config.packages.python-scripts;
in
lib.pipe (config.packages) [
(lib.concatMapAttrs (
name: package: {
${name} = pkgs.mkShell {
name = "${name}";
inputsFrom = [ package ];
shellHook = ''
echo "Entering ${name} devShell"
'';
};
"${name}-extra" =
if (name == "python-scripts") then
null
else
pkgs.mkShell {
name = "${name}-extra";
inputsFrom = [
package
scripts
];
# Extra packages that *may* be used by some scripts
packages = [
pkgs.python3Packages.tiktoken
];
shellHook = ''
echo "Entering ${name} devShell"
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
'';
};
}
))
(lib.filterAttrs (name: value: value != null))
];
};
}
18 changes: 8 additions & 10 deletions .devops/nix/nixpkgs-instances.nix
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,14 @@
config.cudaSupport = true;
config.allowUnfreePredicate =
p:
builtins.all
(
license:
license.free
|| builtins.elem license.shortName [
"CUDA EULA"
"cuDNN EULA"
]
)
(p.meta.licenses or [ p.meta.license ]);
builtins.all (
license:
license.free
|| builtins.elem license.shortName [
"CUDA EULA"
"cuDNN EULA"
]
) (p.meta.licenses or [ p.meta.license ]);
};
# Ensure dependencies use ROCm consistently
pkgsRocm = import inputs.nixpkgs {
Expand Down
36 changes: 36 additions & 0 deletions .devops/nix/package-gguf-py.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
lib,
llamaVersion,
numpy,
tqdm,
sentencepiece,
pyyaml,
poetry-core,
buildPythonPackage,
pytestCheckHook,
}:

buildPythonPackage {
pname = "gguf";
version = llamaVersion;
pyproject = true;
nativeBuildInputs = [ poetry-core ];
propagatedBuildInputs = [
numpy
tqdm
sentencepiece
pyyaml
];
src = lib.cleanSource ../../gguf-py;
pythonImportsCheck = [
"numpy"
"gguf"
];
nativeCheckInputs = [ pytestCheckHook ];
doCheck = true;
meta = with lib; {
description = "Python package for writing binary files in the GGUF format";
license = licenses.mit;
maintainers = [ maintainers.ditsuke ];
};
}
Loading