diff --git a/.github/ISSUE_TEMPLATE/010-bug-compilation.yml b/.github/ISSUE_TEMPLATE/010-bug-compilation.yml deleted file mode 100644 index feb0d512055a6..0000000000000 --- a/.github/ISSUE_TEMPLATE/010-bug-compilation.yml +++ /dev/null @@ -1,87 +0,0 @@ -name: Bug (compilation) -description: Something goes wrong when trying to compile llama.cpp. -title: "Compile bug: " -labels: ["bug-unconfirmed", "compilation"] -body: - - type: markdown - attributes: - value: > - Thanks for taking the time to fill out this bug report! - This issue template is intended for bug reports where the compilation of llama.cpp fails. - Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`. - If the compilation succeeds with ccache disabled you should be able to permanently fix the issue - by clearing `~/.cache/ccache` (on Linux). - - type: textarea - id: commit - attributes: - label: Git commit - description: Which commit are you trying to compile? - placeholder: | - $git rev-parse HEAD - 84a07a17b1b08cf2b9747c633a2372782848a27f - validations: - required: true - - type: dropdown - id: operating-system - attributes: - label: Operating systems - description: Which operating systems do you know to be affected? - multiple: true - options: - - Linux - - Mac - - Windows - - BSD - - Other? (Please let us know in description) - validations: - required: true - - type: dropdown - id: backends - attributes: - label: GGML backends - description: Which GGML backends do you know to be affected? - options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL, zDNN] - multiple: true - validations: - required: true - - type: textarea - id: info - attributes: - label: Problem description & steps to reproduce - description: > - Please give us a summary of the problem and tell us how to reproduce it. - If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us. - placeholder: > - I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY. - Here are the exact commands that I used: ... - validations: - required: true - - type: textarea - id: first_bad_commit - attributes: - label: First Bad Commit - description: > - If the bug was not present on an earlier version: when did it start appearing? - If possible, please do a git bisect and identify the exact commit that introduced the bug. - validations: - required: false - - type: textarea - id: command - attributes: - label: Compile command - description: > - Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`. - This will be automatically formatted into code, so no need for backticks. - render: shell - validations: - required: true - - type: textarea - id: logs - attributes: - label: Relevant log output - description: > - Please copy and paste any relevant log output, including any generated text. - This will be automatically formatted into code, so no need for backticks. - render: shell - validations: - required: true diff --git a/.github/ISSUE_TEMPLATE/011-bug-results.yml b/.github/ISSUE_TEMPLATE/011-bug-results.yml deleted file mode 100644 index c42a14ff83eb6..0000000000000 --- a/.github/ISSUE_TEMPLATE/011-bug-results.yml +++ /dev/null @@ -1,101 +0,0 @@ -name: Bug (model use) -description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module). -title: "Eval bug: " -labels: ["bug-unconfirmed", "model evaluation"] -body: - - type: markdown - attributes: - value: > - Thanks for taking the time to fill out this bug report! - This issue template is intended for bug reports where the model evaluation results - (i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation. - If you encountered the issue while using an external UI (e.g. ollama), - please reproduce your issue using one of the examples/binaries in this repository. - The `llama-cli` binary can be used for simple and reproducible model inference. - - type: textarea - id: version - attributes: - label: Name and Version - description: Which version of our software are you running? (use `--version` to get a version string) - placeholder: | - $./llama-cli --version - version: 2999 (42b4109e) - built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu - validations: - required: true - - type: dropdown - id: operating-system - attributes: - label: Operating systems - description: Which operating systems do you know to be affected? - multiple: true - options: - - Linux - - Mac - - Windows - - BSD - - Other? (Please let us know in description) - validations: - required: true - - type: dropdown - id: backends - attributes: - label: GGML backends - description: Which GGML backends do you know to be affected? - options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL, zDNN] - multiple: true - validations: - required: true - - type: textarea - id: hardware - attributes: - label: Hardware - description: Which CPUs/GPUs are you using? - placeholder: > - e.g. Ryzen 5950X + 2x RTX 4090 - validations: - required: true - - type: textarea - id: model - attributes: - label: Models - description: > - Which model(s) at which quantization were you using when encountering the bug? - If you downloaded a GGUF file off of Huggingface, please provide a link. - placeholder: > - e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M - validations: - required: false - - type: textarea - id: info - attributes: - label: Problem description & steps to reproduce - description: > - Please give us a summary of the problem and tell us how to reproduce it. - If you can narrow down the bug to specific hardware, compile flags, or command line arguments, - that information would be very much appreciated by us. - placeholder: > - e.g. when I run llama-cli with -ngl 99 I get garbled outputs. - When I use -ngl 0 it works correctly. - Here are the exact commands that I used: ... - validations: - required: true - - type: textarea - id: first_bad_commit - attributes: - label: First Bad Commit - description: > - If the bug was not present on an earlier version: when did it start appearing? - If possible, please do a git bisect and identify the exact commit that introduced the bug. - validations: - required: false - - type: textarea - id: logs - attributes: - label: Relevant log output - description: > - Please copy and paste any relevant log output, including the command that you entered and any generated text. - This will be automatically formatted into code, so no need for backticks. - render: shell - validations: - required: true diff --git a/.github/ISSUE_TEMPLATE/019-bug-misc.yml b/.github/ISSUE_TEMPLATE/019-bug-misc.yml deleted file mode 100644 index 1904e31fdc436..0000000000000 --- a/.github/ISSUE_TEMPLATE/019-bug-misc.yml +++ /dev/null @@ -1,91 +0,0 @@ -name: Bug (misc.) -description: Something is not working the way it should (and it's not covered by any of the above cases). -title: "Misc. bug: " -labels: ["bug-unconfirmed"] -body: - - type: markdown - attributes: - value: > - Thanks for taking the time to fill out this bug report! - This issue template is intended for miscellaneous bugs that don't fit into any other category. - If you encountered the issue while using an external UI (e.g. ollama), - please reproduce your issue using one of the examples/binaries in this repository. - - type: textarea - id: version - attributes: - label: Name and Version - description: Which version of our software is affected? (You can use `--version` to get a version string.) - placeholder: | - $./llama-cli --version - version: 2999 (42b4109e) - built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu - validations: - required: true - - type: dropdown - id: operating-system - attributes: - label: Operating systems - description: Which operating systems do you know to be affected? - multiple: true - options: - - Linux - - Mac - - Windows - - BSD - - Other? (Please let us know in description) - validations: - required: false - - type: dropdown - id: module - attributes: - label: Which llama.cpp modules do you know to be affected? - multiple: true - options: - - Documentation/Github - - libllama (core library) - - llama-cli - - llama-server - - llama-bench - - llama-quantize - - Python/Bash scripts - - Test code - - Other (Please specify in the next section) - validations: - required: false - - type: textarea - id: command - attributes: - label: Command line - description: > - Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc. - This will be automatically formatted into code, so no need for backticks. - render: shell - validations: - required: false - - type: textarea - id: info - attributes: - label: Problem description & steps to reproduce - description: > - Please give us a summary of the problem and tell us how to reproduce it (if applicable). - validations: - required: true - - type: textarea - id: first_bad_commit - attributes: - label: First Bad Commit - description: > - If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing? - If possible, please do a git bisect and identify the exact commit that introduced the bug. - validations: - required: false - - type: textarea - id: logs - attributes: - label: Relevant log output - description: > - If applicable, please copy and paste any relevant log output, including any generated text. - This will be automatically formatted into code, so no need for backticks. - render: shell - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/020-enhancement.yml b/.github/ISSUE_TEMPLATE/020-enhancement.yml deleted file mode 100644 index cee1446f5a097..0000000000000 --- a/.github/ISSUE_TEMPLATE/020-enhancement.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Enhancement -description: Used to request enhancements for llama.cpp. -title: "Feature Request: " -labels: ["enhancement"] -body: - - type: markdown - attributes: - value: | - [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggml-org/llama.cpp/discussions/categories/ideas) - - - type: checkboxes - id: prerequisites - attributes: - label: Prerequisites - description: Please confirm the following before submitting your enhancement request. - options: - - label: I am running the latest code. Mention the version if possible as well. - required: true - - label: I carefully followed the [README.md](https://github.com/ggml-org/llama.cpp/blob/master/README.md). - required: true - - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed). - required: true - - label: I reviewed the [Discussions](https://github.com/ggml-org/llama.cpp/discussions), and have a new and useful enhancement to share. - required: true - - - type: textarea - id: feature-description - attributes: - label: Feature Description - description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement. - placeholder: Detailed description of the enhancement - validations: - required: true - - - type: textarea - id: motivation - attributes: - label: Motivation - description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users. - placeholder: Explanation of why this feature is needed and its benefits - validations: - required: true - - - type: textarea - id: possible-implementation - attributes: - label: Possible Implementation - description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better. - placeholder: Detailed description of potential implementation - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/030-research.yml b/.github/ISSUE_TEMPLATE/030-research.yml deleted file mode 100644 index e774550d5908c..0000000000000 --- a/.github/ISSUE_TEMPLATE/030-research.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: Research -description: Track new technical research area. -title: "Research: " -labels: ["research 🔬"] -body: - - type: markdown - attributes: - value: | - Don't forget to check for any [duplicate research issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22) - - - type: checkboxes - id: research-stage - attributes: - label: Research Stage - description: Track general state of this research ticket - options: - - label: Background Research (Let's try to avoid reinventing the wheel) - - label: Hypothesis Formed (How do you think this will work and it's effect?) - - label: Strategy / Implementation Forming - - label: Analysis of results - - label: Debrief / Documentation (So people in the future can learn from us) - - - type: textarea - id: background - attributes: - label: Previous existing literature and research - description: Whats the current state of the art and whats the motivation for this research? - - - type: textarea - id: hypothesis - attributes: - label: Hypothesis - description: How do you think this will work and it's effect? - - - type: textarea - id: implementation - attributes: - label: Implementation - description: Got an approach? e.g. a PR ready to go? - - - type: textarea - id: analysis - attributes: - label: Analysis - description: How does the proposed implementation behave? - - - type: textarea - id: logs - attributes: - label: Relevant log output - description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. - render: shell diff --git a/.github/ISSUE_TEMPLATE/040-refactor.yml b/.github/ISSUE_TEMPLATE/040-refactor.yml deleted file mode 100644 index 2fe94e26c6988..0000000000000 --- a/.github/ISSUE_TEMPLATE/040-refactor.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Refactor (Maintainers) -description: Used to track refactoring opportunities. -title: "Refactor: " -labels: ["refactor"] -body: - - type: markdown - attributes: - value: | - Don't forget to [check for existing refactor issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered. - Also you may want to check [Pull request refactor label as well](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too. - - - type: textarea - id: background-description - attributes: - label: Background Description - description: Please provide a detailed written description of the pain points you are trying to solve. - placeholder: Detailed description behind your motivation to request refactor - validations: - required: true - - - type: textarea - id: possible-approaches - attributes: - label: Possible Refactor Approaches - description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list. - placeholder: Your idea of possible refactoring opportunity/approaches - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index 0d246533c9515..0000000000000 --- a/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,11 +0,0 @@ -blank_issues_enabled: true -contact_links: - - name: Got an idea? - url: https://github.com/ggml-org/llama.cpp/discussions/categories/ideas - about: Pop it there. It may then become an enhancement ticket. - - name: Got a question? - url: https://github.com/ggml-org/llama.cpp/discussions/categories/q-a - about: Ask a question there! - - name: Want to contribute? - url: https://github.com/ggml-org/llama.cpp/wiki/contribute - about: Head to the contribution guide page of the wiki for areas you can help with diff --git a/.github/actions/get-tag-name/action.yml b/.github/actions/get-tag-name/action.yml deleted file mode 100644 index 7ace23b2a3e76..0000000000000 --- a/.github/actions/get-tag-name/action.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: "Determine tag name" -description: "Determine the tag name to use for a release" -outputs: - name: - description: "The name of the tag" - value: ${{ steps.tag.outputs.name }} - -runs: - using: "composite" - steps: - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi diff --git a/.github/actions/windows-setup-cuda/action.yml b/.github/actions/windows-setup-cuda/action.yml deleted file mode 100644 index 5575caeca31a2..0000000000000 --- a/.github/actions/windows-setup-cuda/action.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: "Windows - Setup CUDA Toolkit" -description: "Setup CUDA Toolkit for Windows" -inputs: - cuda_version: - description: "CUDA toolkit version" - required: true - -runs: - using: "composite" - steps: - - name: Install Cuda Toolkit 11.7 - if: ${{ inputs.cuda_version == '11.7' }} - shell: pwsh - run: | - mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" - choco install unzip -y - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip" - unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - - - name: Install Cuda Toolkit 12.4 - if: ${{ inputs.cuda_version == '12.4' }} - shell: pwsh - run: | - mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" - choco install unzip -y - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip" - unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 diff --git a/.github/actions/windows-setup-curl/action.yml b/.github/actions/windows-setup-curl/action.yml deleted file mode 100644 index 446f799fac34a..0000000000000 --- a/.github/actions/windows-setup-curl/action.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: 'Windows - Setup CURL' -description: 'Composite action, to be reused in other workflow' -inputs: - curl_version: - description: 'CURL version' - required: false - default: '8.6.0_6' - architecture: - description: 'Architecture of the libcurl to download' - required: false - default: 'win64' -outputs: - curl_path: - description: "Path to the downloaded libcurl" - value: ${{ steps.get_libcurl.outputs.curl_path }} - -runs: - using: "composite" - steps: - - name: libCURL - id: get_libcurl - shell: powershell - env: - CURL_VERSION: ${{ inputs.curl_version }} - ARCHITECTURE: ${{ inputs.architecture }} - run: | - curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-${env:ARCHITECTURE}-mingw.zip" - mkdir $env:RUNNER_TEMP/libcurl - tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl - echo "curl_path=$env:RUNNER_TEMP/libcurl" >> $env:GITHUB_OUTPUT diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md deleted file mode 100644 index 3250e3279ecb6..0000000000000 --- a/.github/copilot-instructions.md +++ /dev/null @@ -1,262 +0,0 @@ -# Copilot Instructions for llama.cpp - -## Repository Overview - -llama.cpp is a large-scale C/C++ project for efficient LLM (Large Language Model) inference with minimal setup and dependencies. The project enables running language models on diverse hardware with state-of-the-art performance. - -**Key Facts:** -- **Primary language**: C/C++ with Python utility scripts -- **Size**: ~200k+ lines of code across 1000+ files -- **Architecture**: Modular design with main library (`libllama`) and 40+ executable tools/examples -- **Core dependency**: ggml tensor library (vendored in `ggml/` directory) -- **Backends supported**: CPU (AVX/NEON optimized), CUDA, Metal, Vulkan, SYCL, ROCm, MUSA -- **License**: MIT - -## Build Instructions - -### Prerequisites -- CMake 3.14+ (primary build system) -- C++17 compatible compiler (GCC 13.3+, Clang, MSVC) -- Optional: ccache for faster compilation - -### Basic Build (CPU-only) -**ALWAYS run these commands in sequence:** -```bash -cmake -B build -cmake --build build --config Release -j $(nproc) -``` - -**Build time**: ~10 minutes on 4-core system with ccache enabled, ~25 minutes without ccache. - -**Important Notes:** -- The Makefile is deprecated - always use CMake -- ccache is automatically detected and used if available -- Built binaries are placed in `build/bin/` -- Parallel builds (`-j`) significantly reduce build time - -### Backend-Specific Builds -For CUDA support: -```bash -cmake -B build -DGGML_CUDA=ON -cmake --build build --config Release -j $(nproc) -``` - -For Metal (macOS): -```bash -cmake -B build -DGGML_METAL=ON -cmake --build build --config Release -j $(nproc) -``` - -**Important Note**: While all backends can be built as long as the correct requirements for that backend are installed, you will not be able to run them without the correct hardware. The only backend that can be run for testing and validation is the CPU backend. - -### Debug Builds -Single-config generators: -```bash -cmake -B build -DCMAKE_BUILD_TYPE=Debug -cmake --build build -``` - -Multi-config generators: -```bash -cmake -B build -G "Xcode" -cmake --build build --config Debug -``` - -### Common Build Issues -- **Issue**: Network tests fail in isolated environments - **Solution**: Expected behavior - core functionality tests will still pass - -## Testing - -### Running Tests -```bash -ctest --test-dir build --output-on-failure -j $(nproc) -``` - -**Test suite**: 38 tests covering tokenizers, grammar parsing, sampling, backends, and integration -**Expected failures**: 2-3 tests may fail if network access is unavailable (they download models) -**Test time**: ~30 seconds for passing tests - -### Server Unit Tests -Run server-specific unit tests after building the server: -```bash -# Build the server first -cmake --build build --target llama-server - -# Navigate to server tests and run -cd tools/server/tests -source ../../../.venv/bin/activate -./tests.sh -``` -**Server test dependencies**: The `.venv` environment includes the required dependencies for server unit tests (pytest, aiohttp, etc.). Tests can be run individually or with various options as documented in `tools/server/tests/README.md`. - -### Test Categories -- Tokenizer tests: Various model tokenizers (BERT, GPT-2, LLaMA, etc.) -- Grammar tests: GBNF parsing and validation -- Backend tests: Core ggml operations across different backends -- Integration tests: End-to-end workflows - -### Manual Testing Commands -```bash -# Test basic inference -./build/bin/llama-cli --version - -# Test model loading (requires model file) -./build/bin/llama-cli -m path/to/model.gguf -p "Hello" -n 10 -``` - -## Code Quality and Linting - -### C++ Code Formatting -**ALWAYS format C++ code before committing:** -```bash -git clang-format -``` - -Configuration is in `.clang-format` with these key rules: -- 4-space indentation -- 120 column limit -- Braces on same line for functions -- Pointer alignment: `void * ptr` (middle) -- Reference alignment: `int & ref` (middle) - -### Python Code -**ALWAYS activate the Python environment in `.venv` and use tools from that environment:** -```bash -# Activate virtual environment -source .venv/bin/activate -``` - -Configuration files: -- `.flake8`: flake8 settings (max-line-length=125, excludes examples/tools) -- `pyrightconfig.json`: pyright type checking configuration - -### Pre-commit Hooks -Run before committing: -```bash -pre-commit run --all-files -``` - -## Continuous Integration - -### GitHub Actions Workflows -Key workflows that run on every PR: -- `.github/workflows/build.yml`: Multi-platform builds -- `.github/workflows/server.yml`: Server functionality tests -- `.github/workflows/python-lint.yml`: Python code quality -- `.github/workflows/python-type-check.yml`: Python type checking - -### Local CI Validation -**Run full CI locally before submitting PRs:** -```bash -mkdir tmp - -# CPU-only build -bash ./ci/run.sh ./tmp/results ./tmp/mnt -``` - -**CI Runtime**: 30-60 minutes depending on backend configuration - -### Triggering CI -Add `ggml-ci` to commit message to trigger heavy CI workloads on the custom CI infrastructure. - -## Project Layout and Architecture - -### Core Directories -- **`src/`**: Main llama library implementation (`llama.cpp`, `llama-*.cpp`) -- **`include/`**: Public API headers, primarily `include/llama.h` -- **`ggml/`**: Core tensor library (submodule with custom GGML framework) -- **`examples/`**: 30+ example applications and tools -- **`tools/`**: Additional development and utility tools (server benchmarks, tests) -- **`tests/`**: Comprehensive test suite with CTest integration -- **`docs/`**: Detailed documentation (build guides, API docs, etc.) -- **`scripts/`**: Utility scripts for CI, data processing, and automation -- **`common/`**: Shared utility code used across examples - -### Key Files -- **`CMakeLists.txt`**: Primary build configuration -- **`include/llama.h`**: Main C API header (~2000 lines) -- **`src/llama.cpp`**: Core library implementation (~8000 lines) -- **`CONTRIBUTING.md`**: Coding guidelines and PR requirements -- **`.clang-format`**: C++ formatting rules -- **`.pre-commit-config.yaml`**: Git hook configuration - -### Built Executables (in `build/bin/`) -Primary tools: -- **`llama-cli`**: Main inference tool -- **`llama-server`**: OpenAI-compatible HTTP server -- **`llama-quantize`**: Model quantization utility -- **`llama-perplexity`**: Model evaluation tool -- **`llama-bench`**: Performance benchmarking -- **`llama-convert-llama2c-to-ggml`**: Model conversion utilities - -### Configuration Files -- **CMake**: `CMakeLists.txt`, `cmake/` directory -- **Linting**: `.clang-format`, `.clang-tidy`, `.flake8` -- **CI**: `.github/workflows/`, `ci/run.sh` -- **Git**: `.gitignore` (includes build artifacts, models, cache) - -### Dependencies -- **System**: OpenMP, libcurl (for model downloading) -- **Optional**: CUDA SDK, Metal framework, Vulkan SDK, Intel oneAPI -- **Bundled**: httplib, json (header-only libraries in vendored form) - -## Common Validation Steps - -### After Making Changes -1. **Format code**: `git clang-format` -2. **Build**: `cmake --build build --config Release` -3. **Test**: `ctest --test-dir build --output-on-failure` -4. **Server tests** (if modifying server): `cd tools/server/tests && source ../../../.venv/bin/activate && ./tests.sh` -5. **Manual validation**: Test relevant tools in `build/bin/` - -### Performance Validation -```bash -# Benchmark inference performance -./build/bin/llama-bench -m model.gguf - -# Evaluate model perplexity -./build/bin/llama-perplexity -m model.gguf -f dataset.txt -``` - -### Backend Validation -```bash -# Test backend operations -./build/bin/test-backend-ops -``` - -## Environment Setup - -### Required Tools -- CMake 3.14+ (install via system package manager) -- Modern C++ compiler with C++17 support -- Git (for submodule management) -- Python 3.9+ with virtual environment (`.venv` is provided) - -### Optional but Recommended -- ccache: `apt install ccache` or `brew install ccache` -- clang-format 15+: Usually included with LLVM/Clang installation -- pre-commit: `pip install pre-commit` - -### Backend-Specific Requirements -- **CUDA**: NVIDIA CUDA Toolkit 11.2+ -- **Metal**: Xcode command line tools (macOS only) -- **Vulkan**: Vulkan SDK -- **SYCL**: Intel oneAPI toolkit - -## Important Guidelines - -### Code Changes -- **Minimal dependencies**: Avoid adding new external dependencies -- **Cross-platform compatibility**: Test on Linux, macOS, Windows when possible -- **Performance focus**: This is a performance-critical inference library -- **API stability**: Changes to `include/llama.h` require careful consideration - -### Git Workflow -- Always create feature branches from `master` -- **Never** commit build artifacts (`build/`, `.ccache/`, `*.o`, `*.gguf`) -- Use descriptive commit messages following project conventions - -### Trust These Instructions -Only search for additional information if these instructions are incomplete or found to be incorrect. This document contains validated build and test procedures that work reliably across different environments. - diff --git a/.github/labeler.yml b/.github/labeler.yml deleted file mode 100644 index c4da4ab4e1fd2..0000000000000 --- a/.github/labeler.yml +++ /dev/null @@ -1,99 +0,0 @@ -# https://github.com/actions/labeler -Apple Metal: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-metal.h - - ggml/src/ggml-metal/** - - README-metal.md -SYCL: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-sycl.h - - ggml/src/ggml-sycl/** - - docs/backend/SYCL.md - - examples/sycl/** -Nvidia GPU: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-cuda.h - - ggml/src/ggml-cuda/** -Vulkan: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-vulkan.h - - ggml/src/ggml-vulkan/** -IBM zDNN: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-zdnn.h - - ggml/src/ggml-zdnn/** -documentation: - - changed-files: - - any-glob-to-any-file: - - docs/** - - media/** -testing: - - changed-files: - - any-glob-to-any-file: - - tests/** -build: - - changed-files: - - any-glob-to-any-file: - - cmake/** - - CMakeLists.txt - - CMakePresets.json -examples: - - changed-files: - - any-glob-to-any-file: - - examples/** - - tools/** -devops: - - changed-files: - - any-glob-to-any-file: - - .devops/** - - .github/** - - ci/** -python: - - changed-files: - - any-glob-to-any-file: - - "**/*.py" - - requirements/** - - gguf-py/** - - .flake8 -script: - - changed-files: - - any-glob-to-any-file: - - scripts/** -android: - - changed-files: - - any-glob-to-any-file: - - examples/llama.android/** -server: - - changed-files: - - any-glob-to-any-file: - - tools/server/** -ggml: - - changed-files: - - any-glob-to-any-file: - - ggml/** -nix: - - changed-files: - - any-glob-to-any-file: - - "**/*.nix" - - .github/workflows/nix-*.yml - - .devops/nix/nixpkgs-instances.nix -embedding: - - changed-files: - - any-glob-to-any-file: examples/embedding/ - -Ascend NPU: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-cann.h - - ggml/src/ggml-cann/** - - docs/backend/CANN.md -OpenCL: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-opencl.h - - ggml/src/ggml-opencl/** diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md deleted file mode 100644 index d0bdd73c4439c..0000000000000 --- a/.github/pull_request_template.md +++ /dev/null @@ -1 +0,0 @@ -*Make sure to read the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR* diff --git a/.github/workflows/bench.yml.disabled b/.github/workflows/bench.yml.disabled deleted file mode 100644 index f2d7e16e981ac..0000000000000 --- a/.github/workflows/bench.yml.disabled +++ /dev/null @@ -1,304 +0,0 @@ -# TODO: there have been some issues with the workflow, so disabling for now -# https://github.com/ggml-org/llama.cpp/issues/7893 -# -# Benchmark -name: Benchmark - -on: - workflow_dispatch: - inputs: - gpu-series: - description: 'Azure GPU series to run with' - required: true - type: choice - options: - - Standard_NC4as_T4_v3 - - Standard_NC24ads_A100_v4 - - Standard_NC80adis_H100_v5 - sha: - description: 'Commit SHA1 to build' - required: false - type: string - duration: - description: 'Duration of the bench' - type: string - default: 10m - - push: - branches: - - master - paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp'] - pull_request_target: - types: [opened, synchronize, reopened] - paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp'] - schedule: - - cron: '04 2 * * *' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }} - cancel-in-progress: true - -jobs: - bench-server-baseline: - runs-on: Standard_NC4as_T4_v3 - env: - RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it - N_USERS: 8 - DURATION: 10m - - strategy: - matrix: - model: [phi-2] - ftype: [q4_0, q8_0, f16] - include: - - model: phi-2 - ftype: q4_0 - pr_comment_enabled: "true" - - if: | - inputs.gpu-series == 'Standard_NC4as_T4_v3' - || github.event_name == 'pull_request_target' - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - - - name: Install python env - id: pipenv - run: | - cd tools/server/bench - python3 -m venv venv - source venv/bin/activate - pip install -r requirements.txt - - - name: Prometheus - id: install_prometheus - run: | - wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz - tar xzf prometheus*.tar.gz --strip-components=1 - ./prometheus --config.file=tools/server/bench/prometheus.yml & - while ! nc -z localhost 9090; do - sleep 0.1 - done - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: '1.21' - - - name: Install k6 and xk6-sse - id: k6_installation - run: | - cd tools/server/bench - go install go.k6.io/xk6/cmd/xk6@latest - xk6 build master \ - --with github.com/phymbert/xk6-sse - - - name: Build - id: cmake_build - run: | - set -eux - cmake -B build \ - -DGGML_NATIVE=OFF \ - -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CUBLAS=ON \ - -DCUDAToolkit_ROOT=/usr/local/cuda \ - -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ - -DCMAKE_CUDA_ARCHITECTURES=75 \ - -DLLAMA_FATAL_WARNINGS=OFF \ - -DLLAMA_ALL_WARNINGS=OFF \ - -DCMAKE_BUILD_TYPE=Release; - cmake --build build --config Release -j $(nproc) --target llama-server - - - name: Download the dataset - id: download_dataset - run: | - cd tools/server/bench - wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json - - - name: Server bench - id: server_bench - env: - HEAD_REF: ${{ github.head_ref || github.ref_name }} - run: | - set -eux - - cd tools/server/bench - source venv/bin/activate - python bench.py \ - --runner-label ${{ env.RUNNER_LABEL }} \ - --name ${{ github.job }} \ - --branch $HEAD_REF \ - --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \ - --scenario script.js \ - --duration ${{ github.event.inputs.duration || env.DURATION }} \ - --hf-repo ggml-org/models \ - --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \ - --model-path-prefix /models \ - --parallel ${{ env.N_USERS }} \ - -ngl 33 \ - --batch-size 2048 \ - --ubatch-size 256 \ - --ctx-size 16384 \ - --n-prompts 1000 \ - --max-prompt-tokens 1024 \ - --max-tokens 2048 - - cat results.github.env >> $GITHUB_ENV - - # Remove dataset as we do not want it in the artefact - rm ShareGPT_V3_unfiltered_cleaned_split.json - - - uses: actions/upload-artifact@v4 - with: - name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} - compression-level: 9 - path: | - tools/server/bench/*.jpg - tools/server/bench/*.json - tools/server/bench/*.log - - - name: Commit status - uses: Sibz/github-status-action@v1 - with: - authToken: ${{secrets.GITHUB_TOKEN}} - sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }} - context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} - description: | - ${{ env.BENCH_RESULTS }} - state: 'success' - - - name: Upload benchmark images - uses: devicons/public-upload-to-imgur@v2.2.2 - continue-on-error: true # Important as it looks unstable: 503 - id: imgur_step - with: - client_id: ${{secrets.IMGUR_CLIENT_ID}} - path: | - tools/server/bench/prompt_tokens_seconds.jpg - tools/server/bench/predicted_tokens_seconds.jpg - tools/server/bench/kv_cache_usage_ratio.jpg - tools/server/bench/requests_processing.jpg - - - name: Extract mermaid - id: set_mermaid - run: | - set -eux - - cd tools/server/bench - PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid) - echo "PROMPT_TOKENS_SECONDS<> $GITHUB_ENV - echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid) - echo "PREDICTED_TOKENS_SECONDS<> $GITHUB_ENV - echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid) - echo "KV_CACHE_USAGE_RATIO<> $GITHUB_ENV - echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - REQUESTS_PROCESSING=$(cat requests_processing.mermaid) - echo "REQUESTS_PROCESSING<> $GITHUB_ENV - echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - - name: Extract image url - id: extract_image_url - continue-on-error: true - run: | - set -eux - - echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV - echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV - echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV - echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV - - - name: Comment PR - uses: mshick/add-pr-comment@v2 - id: comment_pr - if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }} - with: - message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} - message: | -

- - 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 - -

- -
- - Expand details for performance related PR only - - - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }} - - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }} - - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s - - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s - - ${{ env.BENCH_GRAPH_XLABEL }} - - -

- - prompt_tokens_seconds - -

- - More - - ```mermaid - ${{ env.PROMPT_TOKENS_SECONDS }} - ``` - -
- - predicted_tokens_seconds - -
- More - - ```mermaid - ${{ env.PREDICTED_TOKENS_SECONDS }} - ``` - -
- -

- -
- - Details - -

- - kv_cache_usage_ratio - -

- More - - ```mermaid - ${{ env.KV_CACHE_USAGE_RATIO }} - ``` - -
- - requests_processing - -
- More - - ```mermaid - ${{ env.REQUESTS_PROCESSING }} - ``` - -
- -

-
-
diff --git a/.github/workflows/build-cmake-pkg.yml b/.github/workflows/build-cmake-pkg.yml deleted file mode 100644 index fee2ab96bd0e8..0000000000000 --- a/.github/workflows/build-cmake-pkg.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Build relocatable cmake package -on: - workflow_dispatch: - workflow_call: - -jobs: - linux: - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install dependencies - run: | - sudo apt update - sudo apt install -y build-essential tcl - - - name: Build - run: | - PREFIX="$(pwd)"/inst - cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX" \ - -DLLAMA_CURL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release - cmake --build build --config Release - cmake --install build --prefix "$PREFIX" --config Release - - export LLAMA_CONFIG="$PREFIX"/lib/cmake/llama/llama-config.cmake - tclsh <<'EOF' - set build(commit) [string trim [exec git rev-parse --short HEAD]] - set build(number) [string trim [exec git rev-list --count HEAD]] - set build(version) "0.0.$build(number)" - - set llamaconfig [read [open "$env(LLAMA_CONFIG)" r]] - set checks [list "set\\(LLAMA_VERSION \\s+$build(version)\\)" \ - "set\\(LLAMA_BUILD_COMMIT\\s+$build(commit)\\)" \ - "set\\(LLAMA_BUILD_NUMBER\\s+$build(number)\\)"] - - puts -nonewline "Checking llama-config.cmake version... " - foreach check $checks { - if {![regexp -expanded -- $check $llamaconfig]} { - puts "\"$check\" failed!" - exit 1 - } - } - puts "success." - EOF - - cd examples/simple-cmake-pkg - cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX"/lib/cmake - cmake --build build diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-linux-cross.yml deleted file mode 100644 index 04ad187d35c09..0000000000000 --- a/.github/workflows/build-linux-cross.yml +++ /dev/null @@ -1,346 +0,0 @@ -name: Build on Linux using cross-compiler -on: - workflow_dispatch: - workflow_call: - -jobs: - ubuntu-24-riscv64-cpu-cross: - runs-on: ubuntu-24.04 - - steps: - - uses: actions/checkout@v4 - - name: Setup Riscv - run: | - sudo dpkg --add-architecture riscv64 - - # Add arch-specific repositories for non-amd64 architectures - cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list - deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe - deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe - deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe - deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe - EOF - - sudo apt-get update || true ;# Prevent failure due to missing URLs. - - sudo apt-get install -y --no-install-recommends \ - build-essential \ - gcc-14-riscv64-linux-gnu \ - g++-14-riscv64-linux-gnu - - - name: Build - run: | - cmake -B build -DLLAMA_CURL=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_SYSTEM_NAME=Linux \ - -DCMAKE_SYSTEM_PROCESSOR=riscv64 \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ - -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \ - -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - cmake --build build --config Release -j $(nproc) - - # ubuntu-24-riscv64-vulkan-cross: - # runs-on: ubuntu-24.04 - - # steps: - # - uses: actions/checkout@v4 - # - name: Setup Riscv - # run: | - # sudo dpkg --add-architecture riscv64 - - # # Add arch-specific repositories for non-amd64 architectures - # cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list - # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe - # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe - # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe - # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe - # EOF - - # sudo apt-get update || true ;# Prevent failure due to missing URLs. - - # sudo apt-get install -y --no-install-recommends \ - # build-essential \ - # glslc \ - # gcc-14-riscv64-linux-gnu \ - # g++-14-riscv64-linux-gnu \ - # libvulkan-dev:riscv64 - - # - name: Build - # run: | - # cmake -B build -DLLAMA_CURL=OFF \ - # -DCMAKE_BUILD_TYPE=Release \ - # -DGGML_VULKAN=ON \ - # -DGGML_OPENMP=OFF \ - # -DLLAMA_BUILD_EXAMPLES=ON \ - # -DLLAMA_BUILD_TOOLS=ON \ - # -DLLAMA_BUILD_TESTS=OFF \ - # -DCMAKE_SYSTEM_NAME=Linux \ - # -DCMAKE_SYSTEM_PROCESSOR=riscv64 \ - # -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - # -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ - # -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - # -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \ - # -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - # -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - # cmake --build build --config Release -j $(nproc) - - # ubuntu-24-arm64-vulkan-cross: - # runs-on: ubuntu-24.04 - - # steps: - # - uses: actions/checkout@v4 - # - name: Setup Arm64 - # run: | - # sudo dpkg --add-architecture arm64 - - # # Add arch-specific repositories for non-amd64 architectures - # cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list - # deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe - # deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe - # deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe - # deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe - # EOF - - # sudo apt-get update || true ;# Prevent failure due to missing URLs. - - # sudo apt-get install -y --no-install-recommends \ - # build-essential \ - # glslc \ - # crossbuild-essential-arm64 \ - # libvulkan-dev:arm64 - - # - name: Build - # run: | - # cmake -B build -DLLAMA_CURL=OFF \ - # -DCMAKE_BUILD_TYPE=Release \ - # -DGGML_VULKAN=ON \ - # -DGGML_OPENMP=OFF \ - # -DLLAMA_BUILD_EXAMPLES=ON \ - # -DLLAMA_BUILD_TOOLS=ON \ - # -DLLAMA_BUILD_TESTS=OFF \ - # -DCMAKE_SYSTEM_NAME=Linux \ - # -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ - # -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \ - # -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \ - # -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - # -DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \ - # -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - # -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - # cmake --build build --config Release -j $(nproc) - - ubuntu-24-ppc64el-cpu-cross: - runs-on: ubuntu-24.04 - - steps: - - uses: actions/checkout@v4 - - name: Setup PowerPC64le - run: | - sudo dpkg --add-architecture ppc64el - - # Add arch-specific repositories for non-amd64 architectures - cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list - deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe - deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe - deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe - deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe - EOF - - sudo apt-get update || true ;# Prevent failure due to missing URLs. - - sudo apt-get install -y --no-install-recommends \ - build-essential \ - gcc-14-powerpc64le-linux-gnu \ - g++-14-powerpc64le-linux-gnu - - - name: Build - run: | - cmake -B build -DLLAMA_CURL=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_SYSTEM_NAME=Linux \ - -DCMAKE_SYSTEM_PROCESSOR=ppc64 \ - -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \ - -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \ - -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - cmake --build build --config Release -j $(nproc) - - # ubuntu-24-ppc64el-vulkan-cross: - # runs-on: ubuntu-24.04 - - # steps: - # - uses: actions/checkout@v4 - # - name: Setup PowerPC64le - # run: | - # sudo dpkg --add-architecture ppc64el - - # # Add arch-specific repositories for non-amd64 architectures - # cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list - # deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe - # deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe - # deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe - # deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe - # EOF - - # sudo apt-get update || true ;# Prevent failure due to missing URLs. - - # sudo apt-get install -y --no-install-recommends \ - # build-essential \ - # glslc \ - # gcc-14-powerpc64le-linux-gnu \ - # g++-14-powerpc64le-linux-gnu \ - # libvulkan-dev:ppc64el - - # - name: Build - # run: | - # cmake -B build -DLLAMA_CURL=OFF \ - # -DCMAKE_BUILD_TYPE=Release \ - # -DGGML_VULKAN=ON \ - # -DGGML_OPENMP=OFF \ - # -DLLAMA_BUILD_EXAMPLES=ON \ - # -DLLAMA_BUILD_TOOLS=ON \ - # -DLLAMA_BUILD_TESTS=OFF \ - # -DCMAKE_SYSTEM_NAME=Linux \ - # -DCMAKE_SYSTEM_PROCESSOR=ppc64 \ - # -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \ - # -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \ - # -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - # -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \ - # -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - # -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - # cmake --build build --config Release -j $(nproc) - - debian-13-loongarch64-cpu-cross: - runs-on: ubuntu-24.04 - container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671 - - steps: - - uses: actions/checkout@v4 - - name: Setup LoongArch - run: | - rm -f /etc/apt/sources.list.d/* - cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list - deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main - EOF - ( echo 'quiet "true";'; \ - echo 'APT::Get::Assume-Yes "true";'; \ - echo 'APT::Install-Recommends "false";'; \ - echo 'Acquire::Check-Valid-Until "false";'; \ - echo 'Acquire::Retries "5";'; \ - ) > /etc/apt/apt.conf.d/99snapshot-repos - - apt-get update - apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip - dpkg --add-architecture loong64 - - # Add arch-specific repositories for non-amd64 architectures - cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list - deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main - EOF - - apt-get update || true ;# Prevent failure due to missing URLs. - - apt-get install -y --no-install-recommends \ - build-essential \ - gcc-14-loongarch64-linux-gnu \ - g++-14-loongarch64-linux-gnu - - - name: Build - run: | - cmake -B build -DLLAMA_CURL=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_SYSTEM_NAME=Linux \ - -DCMAKE_SYSTEM_PROCESSOR=loongarch64 \ - -DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \ - -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - -DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \ - -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - cmake --build build --config Release -j $(nproc) - - debian-13-loongarch64-vulkan-cross: - runs-on: ubuntu-24.04 - container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671 - - steps: - - uses: actions/checkout@v4 - - name: Setup LoongArch - run: | - rm -f /etc/apt/sources.list.d/* - cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list - deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main - EOF - ( echo 'quiet "true";'; \ - echo 'APT::Get::Assume-Yes "true";'; \ - echo 'APT::Install-Recommends "false";'; \ - echo 'Acquire::Check-Valid-Until "false";'; \ - echo 'Acquire::Retries "5";'; \ - ) > /etc/apt/apt.conf.d/99snapshot-repos - - apt-get update - apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip - dpkg --add-architecture loong64 - - # Add arch-specific repositories for non-amd64 architectures - cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list - deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main - EOF - - apt-get update || true ;# Prevent failure due to missing URLs. - - apt-get install -y --no-install-recommends \ - build-essential \ - glslc \ - gcc-14-loongarch64-linux-gnu \ - g++-14-loongarch64-linux-gnu \ - libvulkan-dev:loong64 - - - name: Build - run: | - cmake -B build -DLLAMA_CURL=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_VULKAN=ON \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_SYSTEM_NAME=Linux \ - -DCMAKE_SYSTEM_PROCESSOR=loongarch64 \ - -DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \ - -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - -DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \ - -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - cmake --build build --config Release -j $(nproc) diff --git a/.github/workflows/build-riscv-native.yml b/.github/workflows/build-riscv-native.yml deleted file mode 100644 index 86dc0ff76e59c..0000000000000 --- a/.github/workflows/build-riscv-native.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: Build on RISCV Linux Machine by Cloud-V -on: - pull_request: - workflow_dispatch: - workflow_call: - -jobs: - debian-13-riscv64-native: # Bianbu 2.2 - runs-on: self-hosted - - steps: - - name: Install prerequisites - run: | - sudo apt-get update || true - sudo apt-get install -y libatomic1 - - uses: actions/checkout@v4 - - name: Setup Riscv - run: | - sudo apt-get update || true - sudo apt-get install -y --no-install-recommends \ - build-essential \ - gcc-14-riscv64-linux-gnu \ - g++-14-riscv64-linux-gnu \ - ccache \ - cmake - - - name: Setup ccache - run: | - mkdir -p $HOME/.ccache - ccache -M 5G -d $HOME/.ccache - export CCACHE_LOGFILE=/home/runneruser/ccache_debug/ccache.log - export CCACHE_DEBUGDIR="/home/runneruser/ccache_debug" - echo "$GITHUB_WORKSPACE" - echo "CCACHE_LOGFILE=$CCACHE_LOGFILE" >> $GITHUB_ENV - echo "CCACHE_DEBUGDIR=$CCACHE_DEBUGDIR" >> $GITHUB_ENV - echo "CCACHE_BASEDIR=$GITHUB_WORKSPACE" >> $GITHUB_ENV - echo "CCACHE_DIR=$HOME/.ccache" >> $GITHUB_ENV - - - name: Build - run: | - cmake -B build \ - -DLLAMA_CURL=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_SYSTEM_NAME=Linux \ - -DCMAKE_SYSTEM_PROCESSOR=riscv64 \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \ - -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - cmake --build build --config Release -j $(nproc) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index 43553ac13bdf6..0000000000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,1209 +0,0 @@ -name: CI - -on: - workflow_dispatch: # allows manual triggering - push: - branches: - - master - paths: [ - '.github/workflows/build.yml', - '.github/workflows/build-linux-cross.yml', - '.github/workflows/build-cmake-pkg.yml', - '**/CMakeLists.txt', - '**/.cmake', - '**/*.h', - '**/*.hpp', - '**/*.c', - '**/*.cpp', - '**/*.cu', - '**/*.cuh', - '**/*.swift', - '**/*.m', - '**/*.metal', - '**/*.comp' - ] - - pull_request: - types: [opened, synchronize, reopened] - paths: [ - '.github/workflows/build.yml', - '.github/workflows/build-linux-cross.yml', - '.github/workflows/build-cmake-pkg.yml', - '**/CMakeLists.txt', - '**/.cmake', - '**/*.h', - '**/*.hpp', - '**/*.c', - '**/*.cpp', - '**/*.cu', - '**/*.cuh', - '**/*.swift', - '**/*.m', - '**/*.metal', - '**/*.comp' - ] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -env: - GGML_NLOOP: 3 - GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 - -jobs: - macOS-latest-cmake-arm64: - runs-on: macos-14 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-arm64 - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - brew install curl - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build \ - -DCMAKE_BUILD_RPATH="@loader_path" \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=OFF \ - -DGGML_METAL_SHADER_DEBUG=ON \ - -DGGML_RPC=ON - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L 'main|curl' --verbose --timeout 900 - - macOS-latest-cmake-x64: - runs-on: macos-13 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-x64 - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - brew install curl - - - name: Build - id: cmake_build - run: | - sysctl -a - # Metal is disabled due to intermittent failures with Github runners not having a GPU: - # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 - cmake -B build \ - -DCMAKE_BUILD_RPATH="@loader_path" \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DGGML_METAL=OFF \ - -DGGML_RPC=ON - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - macOS-latest-cmake-arm64-webgpu: - runs-on: macos-14 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-arm64-webgpu - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - brew install curl - - - name: Dawn Dependency - id: dawn-depends - run: | - DAWN_VERSION="v1.0.0" - DAWN_OWNER="reeselevine" - DAWN_REPO="dawn" - DAWN_ASSET_NAME="Dawn-a1a6b45cced25a3b7f4fb491e0ae70796cc7f22b-macos-latest-Release.tar.gz" - echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}" - curl -L -o artifact.tar.gz \ - "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}" - mkdir dawn - tar -xvf artifact.tar.gz -C dawn --strip-components=1 - - - name: Build - id: cmake_build - run: | - export CMAKE_PREFIX_PATH=dawn - cmake -B build -DGGML_WEBGPU=ON -DGGML_METAL=OFF -DGGML_BLAS=OFF - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - ubuntu-cpu-cmake: - strategy: - matrix: - include: - - build: 'x64' - os: ubuntu-22.04 - - build: 'arm64' - os: ubuntu-22.04-arm - - runs-on: ${{ matrix.os }} - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-cpu-cmake - evict-old-files: 1d - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DGGML_RPC=ON - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L 'main|curl' --verbose --timeout 900 - - - name: Test llama2c conversion - id: llama2c_test - run: | - cd build - echo "Fetch tokenizer" - wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin - echo "Fetch llama2c model" - wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin - ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf - ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 - - ubuntu-latest-cmake-sanitizer: - runs-on: ubuntu-latest - - continue-on-error: true - - strategy: - matrix: - sanitizer: [ADDRESS, THREAD, UNDEFINED] - build_type: [Debug] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }} - evict-old-files: 1d - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev - - - name: Build - id: cmake_build - if: ${{ matrix.sanitizer != 'THREAD' }} - run: | - cmake -B build \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - - - name: Build (no OpenMP) - id: cmake_build_no_openmp - if: ${{ matrix.sanitizer == 'THREAD' }} - run: | - cmake -B build \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DGGML_OPENMP=OFF - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - ubuntu-latest-llguidance: - runs-on: ubuntu-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev - - - name: Build - id: cmake_build - run: | - mkdir build - cd build - cmake .. \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_LLGUIDANCE=ON - cmake --build . --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - ubuntu-latest-cmake-rpc: - runs-on: ubuntu-latest - - continue-on-error: true - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-latest-cmake-rpc - evict-old-files: 1d - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DGGML_RPC=ON - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose - - ubuntu-22-cmake-vulkan: - runs-on: ubuntu-22.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-22-cmake-vulkan - evict-old-files: 1d - - - name: Dependencies - id: depends - run: | - wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list - sudo apt-get update -y - sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DGGML_VULKAN=ON - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - export GGML_VK_VISIBLE_DEVICES=0 - # This is using llvmpipe and runs slower than other backends - ctest -L main --verbose --timeout 4200 - - ubuntu-22-cmake-webgpu: - runs-on: ubuntu-22.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-22-cmake-webgpu - evict-old-files: 1d - - - name: Vulkan SDK Dependencies - id: vulkan-depends - run: | - wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list - sudo apt-get update -y - sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev - - - name: Dawn Dependency - id: dawn-depends - run: | - sudo apt-get install -y libxrandr-dev libxinerama-dev libxcursor-dev mesa-common-dev libx11-xcb-dev libxi-dev - DAWN_VERSION="v1.0.0" - DAWN_OWNER="reeselevine" - DAWN_REPO="dawn" - DAWN_ASSET_NAME="Dawn-a1a6b45cced25a3b7f4fb491e0ae70796cc7f22b-ubuntu-latest-Release.tar.gz" - echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}" - curl -L -o artifact.tar.gz \ - "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}" - mkdir dawn - tar -xvf artifact.tar.gz -C dawn --strip-components=1 - - - name: Build - id: cmake_build - run: | - export Dawn_DIR=dawn/lib64/cmake/Dawn - cmake -B build -DGGML_WEBGPU=ON - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - # This is using llvmpipe and runs slower than other backends - ctest -L main --verbose --timeout 3600 - - ubuntu-22-cmake-hip: - runs-on: ubuntu-22.04 - container: rocm/dev-ubuntu-22.04:6.1.2 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libcurl4-openssl-dev - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-22-cmake-hip - evict-old-files: 1d - - - name: Build with native CMake HIP support - id: cmake_build - run: | - cmake -B build -S . \ - -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \ - -DGGML_HIP_ROCWMMA_FATTN=ON \ - -DGGML_HIP=ON - cmake --build build --config Release -j $(nproc) - - ubuntu-22-cmake-musa: - runs-on: ubuntu-22.04 - container: mthreads/musa:rc4.2.0-devel-ubuntu22.04-amd64 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - run: | - apt-get update - apt-get install -y build-essential git cmake libcurl4-openssl-dev - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-22-cmake-musa - evict-old-files: 1d - - - name: Build with native CMake MUSA support - id: cmake_build - run: | - cmake -B build -S . \ - -DGGML_MUSA=ON - cmake --build build --config Release -j $(nproc) - - ubuntu-22-cmake-sycl: - runs-on: ubuntu-22.04 - - continue-on-error: true - - steps: - - uses: actions/checkout@v4 - - - name: add oneAPI to apt - shell: bash - run: | - cd /tmp - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" - - - name: install oneAPI dpcpp compiler - shell: bash - run: | - sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp libcurl4-openssl-dev - - - name: install oneAPI MKL library - shell: bash - run: | - sudo apt install intel-oneapi-mkl-devel - - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-22-cmake-sycl - evict-old-files: 1d - - - name: Build - id: cmake_build - run: | - source /opt/intel/oneapi/setvars.sh - cmake -B build \ - -DGGML_SYCL=ON \ - -DCMAKE_C_COMPILER=icx \ - -DCMAKE_CXX_COMPILER=icpx - cmake --build build --config Release -j $(nproc) - - ubuntu-22-cmake-sycl-fp16: - runs-on: ubuntu-22.04 - - continue-on-error: true - - steps: - - uses: actions/checkout@v4 - - - name: add oneAPI to apt - shell: bash - run: | - cd /tmp - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" - - - name: install oneAPI dpcpp compiler - shell: bash - run: | - sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp libcurl4-openssl-dev - - - name: install oneAPI MKL library - shell: bash - run: | - sudo apt install intel-oneapi-mkl-devel - - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-22-cmake-sycl-fp16 - evict-old-files: 1d - - - name: Build - id: cmake_build - run: | - source /opt/intel/oneapi/setvars.sh - cmake -B build \ - -DGGML_SYCL=ON \ - -DCMAKE_C_COMPILER=icx \ - -DCMAKE_CXX_COMPILER=icpx \ - -DGGML_SYCL_F16=ON - cmake --build build --config Release -j $(nproc) - - build-linux-cross: - uses: ./.github/workflows/build-linux-cross.yml - - build-cmake-pkg: - uses: ./.github/workflows/build-cmake-pkg.yml - - macOS-latest-cmake-ios: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-ios - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_COMMON=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-cmake-tvos: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-tvos - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_COMMON=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=tvOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-cmake-visionos: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_COMMON=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=visionOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-swift: - runs-on: macos-latest - - strategy: - matrix: - destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-swift - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build llama.cpp with CMake - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_CURL=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - - name: xcodebuild for swift package - id: xcodebuild - run: | - ./build-xcframework.sh - - windows-msys2: - runs-on: windows-2025 - - strategy: - fail-fast: false - matrix: - include: - - { sys: UCRT64, env: ucrt-x86_64, build: Release } - - { sys: CLANG64, env: clang-x86_64, build: Release } - - steps: - - name: Clone - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: windows-msys2 - variant: ccache - evict-old-files: 1d - - - name: Setup ${{ matrix.sys }} - uses: msys2/setup-msys2@v2 - with: - update: true - msystem: ${{matrix.sys}} - install: >- - base-devel - git - mingw-w64-${{matrix.env}}-toolchain - mingw-w64-${{matrix.env}}-cmake - mingw-w64-${{matrix.env}}-openblas - - - name: Build using CMake - shell: msys2 {0} - run: | - cmake -B build - cmake --build build --config ${{ matrix.build }} -j $(nproc) - - - name: Clean after building using CMake - shell: msys2 {0} - run: | - rm -rf build - - - name: Build using CMake w/ OpenBLAS - shell: msys2 {0} - run: | - cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS - cmake --build build --config ${{ matrix.build }} -j $(nproc) - - windows-latest-cmake: - runs-on: windows-2025 - - env: - OPENBLAS_VERSION: 0.3.23 - SDE_VERSION: 9.33.0-2024-01-07 - VULKAN_VERSION: 1.4.313.2 - - strategy: - matrix: - include: - - build: 'cpu-x64 (static)' - arch: 'x64' - defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF' - - build: 'openblas-x64' - arch: 'x64' - defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' - - build: 'vulkan-x64' - arch: 'x64' - defines: '-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON' - - build: 'llvm-arm64' - arch: 'arm64' - defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON' - - build: 'llvm-arm64-opencl-adreno' - arch: 'arm64' - defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON' - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: windows-latest-cmake-${{ matrix.build }} - variant: ccache - evict-old-files: 1d - - - name: Download OpenBLAS - id: get_openblas - if: ${{ matrix.build == 'openblas-x64' }} - run: | - curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip" - curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE" - mkdir $env:RUNNER_TEMP/openblas - tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas - $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) - $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) - $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe') - & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll - - - name: Install Vulkan SDK - id: get_vulkan - if: ${{ matrix.build == 'vulkan-x64' }} - run: | - curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe" - & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install - Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" - Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" - - - name: Install Ninja - id: install_ninja - run: | - choco install ninja - - - name: Install OpenCL Headers and Libs - id: install_opencl - if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }} - run: | - git clone https://github.com/KhronosGroup/OpenCL-Headers - cd OpenCL-Headers - cmake -B build ` - -DBUILD_TESTING=OFF ` - -DOPENCL_HEADERS_BUILD_TESTING=OFF ` - -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` - -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" - cmake --build build --target install - git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader - cd OpenCL-ICD-Loader - cmake -B build-arm64-release ` - -A arm64 ` - -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" ` - -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" - cmake --build build-arm64-release --target install --config release - - - name: libCURL - id: get_libcurl - uses: ./.github/actions/windows-setup-curl - with: - architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }} - - - name: Build - id: cmake_build - env: - CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} - run: | - cmake -S . -B build ${{ matrix.defines }} ` - -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" - cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} - cp $env:CURL_PATH/bin/libcurl-*.dll build/bin/Release - - - name: Add libopenblas.dll - id: add_libopenblas_dll - if: ${{ matrix.build == 'openblas-x64' }} - run: | - cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll - cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt - - - name: Test - id: cmake_test - if: ${{ matrix.arch == 'x64' }} - run: | - cd build - ctest -L main -C Release --verbose --timeout 900 - - # TODO: disabled for now, consider adding tests for all CPU variants instead - # - name: Test (Intel SDE) - # id: cmake_test_sde - # if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation - # run: | - # curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz" - # # for some weird reason windows tar doesn't like sde tar.xz - # 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz - # 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar - # $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe) - # cd build - # $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1 - # & $sde -future -- ctest -L main -C Release --verbose --timeout 900 - - ubuntu-latest-cmake-cuda: - runs-on: ubuntu-latest - container: nvidia/cuda:12.6.2-devel-ubuntu24.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Install dependencies - env: - DEBIAN_FRONTEND: noninteractive - run: | - apt update - apt install -y cmake build-essential ninja-build libgomp1 git libcurl4-openssl-dev - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-latest-cmake-cuda - evict-old-files: 1d - - - name: Build with CMake - run: | - cmake -S . -B build -G Ninja \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CUDA_ARCHITECTURES=89-real \ - -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DGGML_NATIVE=OFF \ - -DGGML_CUDA=ON - cmake --build build - - windows-2022-cmake-cuda: - runs-on: windows-2022 - - strategy: - matrix: - cuda: ['12.4'] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Install ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: windows-cuda-${{ matrix.cuda }} - variant: ccache - evict-old-files: 1d - - - name: Install Cuda Toolkit - uses: ./.github/actions/windows-setup-cuda - with: - cuda_version: ${{ matrix.cuda }} - - - name: Install Ninja - id: install_ninja - run: | - choco install ninja - - - name: libCURL - id: get_libcurl - uses: ./.github/actions/windows-setup-curl - - - name: Build - id: cmake_build - shell: cmd - env: - CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} - run: | - call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 - cmake -S . -B build -G "Ninja Multi-Config" ^ - -DLLAMA_BUILD_SERVER=ON ^ - -DGGML_NATIVE=OFF ^ - -DGGML_BACKEND_DL=ON ^ - -DGGML_CPU_ALL_VARIANTS=ON ^ - -DGGML_CUDA=ON ^ - -DGGML_RPC=ON ^ - -DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include" - set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 - cmake --build build --config Release -j %NINJA_JOBS% -t ggml - cmake --build build --config Release - - windows-latest-cmake-sycl: - runs-on: windows-2022 - - defaults: - run: - shell: bash - - env: - WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe - WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel - ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI" - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: windows-latest-cmake-sycl - variant: ccache - evict-old-files: 1d - - - name: Install - run: | - scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL - - # TODO: add libcurl support ; we will also need to modify win-build-sycl.bat to accept user-specified args - - - name: Build - id: cmake_build - run: examples/sycl/win-build-sycl.bat - - windows-latest-cmake-hip: - if: ${{ github.event.inputs.create_release != 'true' }} - runs-on: windows-2022 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Clone rocWMMA repository - id: clone_rocwmma - run: | - git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1 - - - name: Install - id: depends - run: | - $ErrorActionPreference = "Stop" - write-host "Downloading AMD HIP SDK Installer" - Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" - write-host "Installing AMD HIP SDK" - $proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru - $proc.WaitForExit(600000) - write-host "Completed AMD HIP SDK installation" - - - name: Verify ROCm - id: verify - run: | - & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version - - - name: Install ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ${{ github.job }} - evict-old-files: 1d - - - name: libCURL - id: get_libcurl - uses: ./.github/actions/windows-setup-curl - - - name: Build - id: cmake_build - env: - CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} - run: | - $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) - $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" - cmake -G "Unix Makefiles" -B build -S . ` - -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` - -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` - -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/" ` - -DCMAKE_BUILD_TYPE=Release ` - -DGGML_HIP=ON ` - -DGGML_HIP_ROCWMMA_FATTN=ON ` - -DGGML_RPC=ON ` - -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" - cmake --build build -j ${env:NUMBER_OF_PROCESSORS} - - ios-xcode-build: - runs-on: macos-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Setup Xcode - uses: maxim-lobanov/setup-xcode@v1 - with: - xcode-version: latest-stable - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_CURL=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - - name: xcodebuild for swift package - id: xcodebuild - run: | - ./build-xcframework.sh - - - name: Build Xcode project - run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build - - android-build: - runs-on: ubuntu-latest - - steps: - - name: Clone - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: android-build - evict-old-files: 1d - - - name: Set up JDK - uses: actions/setup-java@v3 - with: - java-version: 17 - distribution: zulu - - - name: Setup Android SDK - uses: android-actions/setup-android@v3 - with: - log-accepted-android-sdk-licenses: false - - - name: Build - run: | - cd examples/llama.android - ./gradlew build --no-daemon - - openEuler-latest-cmake-cann: - if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }} - defaults: - run: - shell: bash -el {0} - strategy: - matrix: - arch: [x86, aarch64] - cann: - - '8.1.RC1.alpha001-910b-openeuler22.03-py3.10' - device: - - 'ascend910b3' - build: - - 'Release' - runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} - container: ascendai/cann:${{ matrix.cann }} - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Dependencies - run: | - yum update -y - yum install -y git gcc gcc-c++ make cmake libcurl-devel - - - name: Build - run: | - export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH} - - cmake -S . -B build \ - -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ - -DGGML_CANN=on \ - -DSOC_TYPE=${{ matrix.device }} - cmake --build build -j $(nproc) diff --git a/.github/workflows/close-issue.yml b/.github/workflows/close-issue.yml deleted file mode 100644 index 19e7854745d69..0000000000000 --- a/.github/workflows/close-issue.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Close inactive issues -on: - schedule: - - cron: "42 0 * * *" - -# Fine-grant permission -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token -permissions: - issues: write - -jobs: - close-issues: - runs-on: ubuntu-latest - permissions: - issues: write - pull-requests: write - steps: - - uses: actions/stale@v5 - with: - exempt-issue-labels: "refactoring,help wanted,good first issue,research,bug,roadmap" - days-before-issue-stale: 30 - days-before-issue-close: 14 - stale-issue-label: "stale" - close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale." - days-before-pr-stale: -1 - days-before-pr-close: -1 - operations-per-run: 10000 - repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/copilot-setup-steps.yml b/.github/workflows/copilot-setup-steps.yml deleted file mode 100644 index 3645e30378b95..0000000000000 --- a/.github/workflows/copilot-setup-steps.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: "Copilot Setup Steps" - -# Automatically run the setup steps when they are changed to allow for easy validation, and -# allow manual testing through the repository's "Actions" tab -on: - workflow_dispatch: - push: - paths: - - .github/workflows/copilot-setup-steps.yml - pull_request: - paths: - - .github/workflows/copilot-setup-steps.yml - -jobs: - # The job MUST be called `copilot-setup-steps` or it will not be picked up by Copilot. - copilot-setup-steps: - runs-on: ubuntu-latest - - # Set the permissions to the lowest permissions possible needed for your steps. - # Copilot will be given its own token for its operations. - permissions: - # If you want to clone the repository as part of your setup steps, for example to install dependencies, you'll need the `contents: read` permission. If you don't clone the repository in your setup steps, Copilot will do this for you automatically after the steps complete. - contents: read - - # You can define any steps you want, and they will run before the agent starts. - # If you do not check out your code, Copilot will do this for you. - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: copilot-setup-steps - evict-old-files: 1d - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev - # Install git-clang-format script for formatting only changed code - wget -O /tmp/git-clang-format https://raw.githubusercontent.com/llvm/llvm-project/release/18.x/clang/tools/clang-format/git-clang-format - sudo cp /tmp/git-clang-format /usr/local/bin/git-clang-format - sudo chmod +x /usr/local/bin/git-clang-format - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Install Python dependencies - run: | - python3 -m venv .venv - .venv/bin/activate - pip install -r requirements/requirements-all.txt -r tools/server/tests/requirements.txt - pip install flake8 pyright pre-commit diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml deleted file mode 100644 index 2067927be56ca..0000000000000 --- a/.github/workflows/docker.yml +++ /dev/null @@ -1,178 +0,0 @@ -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -# GitHub recommends pinning actions to a commit SHA. -# To get a newer version, you will need to update the SHA. -# You can also reference a tag or branch, but the action may change without warning. - -name: Publish Docker image - -on: - workflow_dispatch: # allows manual triggering - schedule: - # Rebuild daily rather than on every push because it is expensive - - cron: '12 4 * * *' - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -# Fine-grant permission -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token -permissions: - packages: write - -jobs: - push_to_registry: - name: Push Docker image to Docker Hub - - runs-on: ubuntu-22.04 - env: - COMMIT_SHA: ${{ github.sha }} - strategy: - fail-fast: false - matrix: - config: - # Multi-stage build - # Note: the arm64 images are failing, which prevents the amd64 images from being built - # https://github.com/ggml-org/llama.cpp/issues/11888 - #- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false } - - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false } - - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false } - - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true } - - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true } - - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false } - # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete - #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true } - steps: - - name: Check out the repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 # preserve git history, so we can determine the build number - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - with: - image: tonistiigi/binfmt:qemu-v7.0.0-28 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Log in to Docker Hub - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case - REPO_NAME="${{ github.event.repository.name }}" - - # determine tag name postfix (build number, commit hash) - if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then - TAG_POSTFIX="-b${BUILD_NUMBER}" - else - SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-') - TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}" - fi - # list all tags possible - if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then - TYPE="" - else - TYPE="-${{ matrix.config.tag }}" - fi - PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:" - FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}" - LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}" - SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}" - echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT - echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT - echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT - echo "full_output_tags=$FULLTAGS" # print out for debugging - echo "light_output_tags=$LIGHTTAGS" # print out for debugging - echo "server_output_tags=$SERVERTAGS" # print out for debugging - env: - GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}' - - - name: Free Disk Space (Ubuntu) - if: ${{ matrix.config.free_disk_space == true }} - uses: ggml-org/free-disk-space@v1.3.1 - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: true - docker-images: true - swap-storage: true - - - name: Build and push Full Docker image (tagged + versioned) - if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }} - uses: docker/build-push-action@v6 - with: - context: . - push: true - platforms: ${{ matrix.config.platforms }} - # tag list is generated from step above - tags: ${{ steps.tag.outputs.full_output_tags }} - file: ${{ matrix.config.dockerfile }} - target: full - provenance: false - # using github experimental cache - cache-from: type=gha - cache-to: type=gha,mode=max - # return to this if the experimental github cache is having issues - #cache-to: type=local,dest=/tmp/.buildx-cache - #cache-from: type=local,src=/tmp/.buildx-cache - - - name: Build and push Light Docker image (tagged + versioned) - if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }} - uses: docker/build-push-action@v6 - with: - context: . - push: true - platforms: ${{ matrix.config.platforms }} - # tag list is generated from step above - tags: ${{ steps.tag.outputs.light_output_tags }} - file: ${{ matrix.config.dockerfile }} - target: light - provenance: false - # using github experimental cache - cache-from: type=gha - cache-to: type=gha,mode=max - # return to this if the experimental github cache is having issues - #cache-to: type=local,dest=/tmp/.buildx-cache - #cache-from: type=local,src=/tmp/.buildx-cache - - - name: Build and push Server Docker image (tagged + versioned) - if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }} - uses: docker/build-push-action@v6 - with: - context: . - push: true - platforms: ${{ matrix.config.platforms }} - # tag list is generated from step above - tags: ${{ steps.tag.outputs.server_output_tags }} - file: ${{ matrix.config.dockerfile }} - target: server - provenance: false - # using github experimental cache - cache-from: type=gha - cache-to: type=gha,mode=max - # return to this if the experimental github cache is having issues - #cache-to: type=local,dest=/tmp/.buildx-cache - #cache-from: type=local,src=/tmp/.buildx-cache diff --git a/.github/workflows/editorconfig.yml b/.github/workflows/editorconfig.yml deleted file mode 100644 index f02b7c2194bcf..0000000000000 --- a/.github/workflows/editorconfig.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: EditorConfig Checker - -on: - workflow_dispatch: # allows manual triggering - inputs: - create_release: - description: 'Create new release' - required: true - type: boolean - push: - branches: - - master - pull_request: - branches: - - master - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -jobs: - editorconfig: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: editorconfig-checker/action-editorconfig-checker@v2 - with: - version: v3.0.3 - - run: editorconfig-checker diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml deleted file mode 100644 index 3ca4d30581074..0000000000000 --- a/.github/workflows/gguf-publish.yml +++ /dev/null @@ -1,44 +0,0 @@ -# This workflow will upload a Python Package using Twine when a GGUF release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - -# See `gguf-py/README.md` for how to make a release. - -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -name: Upload Python Package - -on: - workflow_dispatch: - push: - # Pattern matched against refs/tags - tags: - - 'gguf-v*' # Push events to every version tag - - -jobs: - deploy: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.9.x' - - name: Install dependencies - run: | - cd gguf-py - python -m pip install poetry - poetry install - - - name: Build package - run: cd gguf-py && poetry build - - name: Publish package - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} - packages-dir: gguf-py/dist diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml deleted file mode 100644 index 0b0f300aa402a..0000000000000 --- a/.github/workflows/labeler.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: "Pull Request Labeler" -on: -- pull_request_target - -jobs: - labeler: - permissions: - contents: read - pull-requests: write - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - repository: "ggml-org/llama.cpp" - - uses: actions/labeler@v5 - with: - configuration-path: '.github/labeler.yml' diff --git a/.github/workflows/pre-tokenizer-hashes.yml b/.github/workflows/pre-tokenizer-hashes.yml deleted file mode 100644 index dff998e239319..0000000000000 --- a/.github/workflows/pre-tokenizer-hashes.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: Check Pre-Tokenizer Hashes - -on: - push: - paths: - - 'convert_hf_to_gguf.py' - - 'convert_hf_to_gguf_update.py' - pull_request: - paths: - - 'convert_hf_to_gguf.py' - - 'convert_hf_to_gguf_update.py' - -jobs: - pre-tokenizer-hashes: - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Install Python dependencies - run: | - python3 -m venv .venv - .venv/bin/pip install -r requirements/requirements-convert_hf_to_gguf_update.txt - - - name: Update pre-tokenizer hashes - run: | - cp convert_hf_to_gguf.py /tmp - .venv/bin/python convert_hf_to_gguf_update.py --check-missing - - - name: Check if committed pre-tokenizer hashes matches generated version - run: | - if ! diff -q convert_hf_to_gguf.py /tmp/convert_hf_to_gguf.py; then - echo "Model pre-tokenizer hashes (in convert_hf_to_gguf.py) do not match generated hashes (from convert_hf_to_gguf_update.py)." - echo "To fix: run ./convert_hf_to_gguf_update.py and commit the updated convert_hf_to_gguf.py along with your changes" - echo "Differences found:" - diff convert_hf_to_gguf.py /tmp/convert_hf_to_gguf.py || true - exit 1 - fi - echo "Model pre-tokenizer hashes are up to date." diff --git a/.github/workflows/python-check-requirements.yml b/.github/workflows/python-check-requirements.yml deleted file mode 100644 index 46e80aecd0a0c..0000000000000 --- a/.github/workflows/python-check-requirements.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Python check requirements.txt - -on: - push: - paths: - - '.github/workflows/python-check-requirements.yml' - - 'scripts/check-requirements.sh' - - 'convert*.py' - - '**/requirements*.txt' - pull_request: - paths: - - '.github/workflows/python-check-requirements.yml' - - 'scripts/check-requirements.sh' - - 'convert*.py' - - '**/requirements*.txt' - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -jobs: - python-check-requirements: - runs-on: ubuntu-latest - name: check-requirements - steps: - - name: Check out source repository - uses: actions/checkout@v4 - - name: Set up Python environment - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: Run check-requirements.sh script - run: bash scripts/check-requirements.sh diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml deleted file mode 100644 index ddfdf73b8fce2..0000000000000 --- a/.github/workflows/python-lint.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: flake8 Lint - -on: - push: - branches: - - master - paths: ['.github/workflows/python-lint.yml', '**/*.py'] - pull_request: - types: [opened, synchronize, reopened] - paths: ['.github/workflows/python-lint.yml', '**/*.py'] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -jobs: - flake8-lint: - runs-on: ubuntu-latest - name: Lint - steps: - - name: Check out source repository - uses: actions/checkout@v4 - - name: Set up Python environment - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: flake8 Lint - uses: py-actions/flake8@v2 - with: - plugins: "flake8-no-print" diff --git a/.github/workflows/python-type-check.yml b/.github/workflows/python-type-check.yml deleted file mode 100644 index 373bb601020b2..0000000000000 --- a/.github/workflows/python-type-check.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Python Type-Check - -on: - push: - paths: - - '.github/workflows/python-type-check.yml' - - 'pyrightconfig.json' - - '**.py' - - '**/requirements*.txt' - pull_request: - paths: - - '.github/workflows/python-type-check.yml' - - 'pyrightconfig.json' - - '**.py' - - '**/requirements*.txt' - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -jobs: - python-type-check: - runs-on: ubuntu-latest - name: pyright type-check - steps: - - name: Check out source repository - uses: actions/checkout@v4 - - name: Set up Python environment - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: Install Python dependencies - # TODO: use a venv - run: pip install -r requirements/requirements-all.txt - - name: Type-check with Pyright - uses: jakebailey/pyright-action@v2 - with: - version: 1.1.382 - level: warning - warnings: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 5367637e42843..0000000000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,760 +0,0 @@ -name: Release - -on: - workflow_dispatch: # allows manual triggering - inputs: - create_release: - description: 'Create new release' - required: true - type: boolean - push: - branches: - - master - paths: ['.github/workflows/release.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -env: - BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON" - -jobs: - macOS-arm64: - runs-on: macos-14 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-arm64 - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - brew install curl - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build \ - -DCMAKE_INSTALL_RPATH='@loader_path' \ - -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DGGML_RPC=ON \ - ${{ env.CMAKE_ARGS }} - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - - name: Determine tag name - id: tag - uses: ./.github/actions/get-tag-name - - - name: Pack artifacts - id: pack_artifacts - run: | - cp LICENSE ./build/bin/ - zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/* - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip - name: llama-bin-macos-arm64.zip - - macOS-x64: - runs-on: macos-13 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-x64 - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - brew install curl - - - name: Build - id: cmake_build - run: | - sysctl -a - # Metal is disabled due to intermittent failures with Github runners not having a GPU: - # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 - cmake -B build \ - -DCMAKE_INSTALL_RPATH='@loader_path' \ - -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DGGML_METAL=OFF \ - -DGGML_RPC=ON - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - - name: Determine tag name - id: tag - uses: ./.github/actions/get-tag-name - - - name: Pack artifacts - id: pack_artifacts - run: | - cp LICENSE ./build/bin/ - zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/* - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip - name: llama-bin-macos-x64.zip - - ubuntu-22-cpu: - strategy: - matrix: - include: - - build: 'x64' - os: ubuntu-22.04 - # GGML_BACKEND_DL and GGML_CPU_ALL_VARIANTS are not currently supported on arm - # - build: 'arm64' - # os: ubuntu-22.04-arm - - runs-on: ${{ matrix.os }} - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-cpu-cmake - evict-old-files: 1d - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DCMAKE_INSTALL_RPATH='$ORIGIN' \ - -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ - -DGGML_BACKEND_DL=ON \ - -DGGML_NATIVE=OFF \ - -DGGML_CPU_ALL_VARIANTS=ON \ - -DLLAMA_FATAL_WARNINGS=ON \ - ${{ env.CMAKE_ARGS }} - cmake --build build --config Release -j $(nproc) - - - name: Determine tag name - id: tag - uses: ./.github/actions/get-tag-name - - - name: Pack artifacts - id: pack_artifacts - run: | - cp LICENSE ./build/bin/ - zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip ./build/bin/* - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.zip - name: llama-bin-ubuntu-${{ matrix.build }}.zip - - ubuntu-22-vulkan: - runs-on: ubuntu-22.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: ubuntu-22-cmake-vulkan - evict-old-files: 1d - - - name: Dependencies - id: depends - run: | - wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list - sudo apt-get update -y - sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DCMAKE_INSTALL_RPATH='$ORIGIN' \ - -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ - -DGGML_BACKEND_DL=ON \ - -DGGML_NATIVE=OFF \ - -DGGML_CPU_ALL_VARIANTS=ON \ - -DGGML_VULKAN=ON \ - ${{ env.CMAKE_ARGS }} - cmake --build build --config Release -j $(nproc) - - - name: Determine tag name - id: tag - uses: ./.github/actions/get-tag-name - - - name: Pack artifacts - id: pack_artifacts - run: | - cp LICENSE ./build/bin/ - zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip ./build/bin/* - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.zip - name: llama-bin-ubuntu-vulkan-x64.zip - - windows-cpu: - runs-on: windows-2025 - - strategy: - matrix: - include: - - arch: 'x64' - - arch: 'arm64' - - steps: - - name: Clone - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: windows-latest-cmake-cpu-${{ matrix.arch }} - variant: ccache - evict-old-files: 1d - - - name: Install Ninja - run: | - choco install ninja - - - name: libCURL - id: get_libcurl - uses: ./.github/actions/windows-setup-curl - with: - architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }} - - - name: Build - shell: cmd - env: - CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} - run: | - call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }} - cmake -S . -B build -G "Ninja Multi-Config" ^ - -D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^ - -DGGML_NATIVE=OFF ^ - -DGGML_BACKEND_DL=ON ^ - -DGGML_CPU_ALL_VARIANTS=${{ matrix.arch == 'x64' && 'ON' || 'OFF' }} ^ - -DGGML_OPENMP=ON ^ - -DCURL_LIBRARY="%CURL_PATH%/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="%CURL_PATH%/include" ^ - ${{ env.CMAKE_ARGS }} - cmake --build build --config Release - - - name: Pack artifacts - id: pack_artifacts - env: - CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} - run: | - Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\ - Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\ - 7z a llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\* - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - path: llama-bin-win-cpu-${{ matrix.arch }}.zip - name: llama-bin-win-cpu-${{ matrix.arch }}.zip - - windows: - runs-on: windows-2025 - - env: - OPENBLAS_VERSION: 0.3.23 - VULKAN_VERSION: 1.4.313.2 - - strategy: - matrix: - include: - - backend: 'vulkan' - arch: 'x64' - defines: '-DGGML_VULKAN=ON' - target: 'ggml-vulkan' - - backend: 'opencl-adreno' - arch: 'arm64' - defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON' - target: 'ggml-opencl' - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: windows-latest-cmake-${{ matrix.backend }}-${{ matrix.arch }} - variant: ccache - evict-old-files: 1d - - - name: Install Vulkan SDK - id: get_vulkan - if: ${{ matrix.backend == 'vulkan' }} - run: | - curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe" - & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install - Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" - Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" - - - name: Install Ninja - id: install_ninja - run: | - choco install ninja - - - name: Install OpenCL Headers and Libs - id: install_opencl - if: ${{ matrix.backend == 'opencl-adreno' && matrix.arch == 'arm64' }} - run: | - git clone https://github.com/KhronosGroup/OpenCL-Headers - cd OpenCL-Headers - cmake -B build ` - -DBUILD_TESTING=OFF ` - -DOPENCL_HEADERS_BUILD_TESTING=OFF ` - -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` - -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" - cmake --build build --target install - git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader - cd OpenCL-ICD-Loader - cmake -B build-arm64-release ` - -A arm64 ` - -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" ` - -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" - cmake --build build-arm64-release --target install --config release - - - name: Build - id: cmake_build - run: | - cmake -S . -B build ${{ matrix.defines }} -DGGML_NATIVE=OFF -DGGML_CPU=OFF -DGGML_BACKEND_DL=ON -DLLAMA_CURL=OFF - cmake --build build --config Release --target ${{ matrix.target }} - - - name: Pack artifacts - id: pack_artifacts - run: | - 7z a llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - path: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip - name: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip - - windows-cuda: - runs-on: windows-2022 - - strategy: - matrix: - cuda: ['12.4'] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Install ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: windows-cuda-${{ matrix.cuda }} - variant: ccache - evict-old-files: 1d - - - name: Install Cuda Toolkit - uses: ./.github/actions/windows-setup-cuda - with: - cuda_version: ${{ matrix.cuda }} - - - name: Install Ninja - id: install_ninja - run: | - choco install ninja - - - name: Build - id: cmake_build - shell: cmd - run: | - call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 - cmake -S . -B build -G "Ninja Multi-Config" ^ - -DGGML_BACKEND_DL=ON ^ - -DGGML_NATIVE=OFF ^ - -DGGML_CPU=OFF ^ - -DGGML_CUDA=ON ^ - -DLLAMA_CURL=OFF - set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 - cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda - - - name: Pack artifacts - id: pack_artifacts - run: | - 7z a llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - path: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip - name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip - - - name: Copy and pack Cuda runtime - run: | - echo "Cuda install location: ${{ env.CUDA_PATH }}" - $dst='.\build\bin\cudart\' - robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll - robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll - 7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\* - - - name: Upload Cuda runtime - uses: actions/upload-artifact@v4 - with: - path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip - name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip - - windows-sycl: - runs-on: windows-2022 - - defaults: - run: - shell: bash - - env: - WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/7cd9bba0-7aab-4e30-b3ae-2221006a4a05/intel-oneapi-base-toolkit-2025.1.1.34_offline.exe - WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel - ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI" - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: windows-latest-cmake-sycl - variant: ccache - evict-old-files: 1d - - - name: Install - run: | - scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL - - - name: Build - id: cmake_build - shell: cmd - run: | - call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force - cmake -G "Ninja" -B build ^ - -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx ^ - -DCMAKE_BUILD_TYPE=Release ^ - -DGGML_BACKEND_DL=ON -DBUILD_SHARED_LIBS=ON ^ - -DGGML_CPU=OFF -DGGML_SYCL=ON ^ - -DLLAMA_CURL=OFF - cmake --build build --target ggml-sycl -j - - - name: Build the release package - id: pack_artifacts - run: | - echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin" - - cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin - - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin - - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin - - cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin - - echo "cp oneAPI running time dll files to ./build/bin done" - 7z a llama-bin-win-sycl-x64.zip ./build/bin/* - - - name: Upload the release package - uses: actions/upload-artifact@v4 - with: - path: llama-bin-win-sycl-x64.zip - name: llama-bin-win-sycl-x64.zip - - windows-hip: - runs-on: windows-2022 - - strategy: - matrix: - include: - - name: "radeon" - gpu_targets: "gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032" - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Clone rocWMMA repository - id: clone_rocwmma - run: | - git clone https://github.com/rocm/rocwmma --branch rocm-6.2.4 --depth 1 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.16 - with: - key: windows-latest-cmake-hip-${{ matrix.name }}-x64 - evict-old-files: 1d - - - name: Install - id: depends - run: | - $ErrorActionPreference = "Stop" - write-host "Downloading AMD HIP SDK Installer" - Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" - write-host "Installing AMD HIP SDK" - $proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru - $proc.WaitForExit(600000) - write-host "Completed AMD HIP SDK installation" - - - name: Verify ROCm - id: verify - run: | - & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version - - - name: Build - id: cmake_build - run: | - $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) - $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" - cmake -G "Unix Makefiles" -B build -S . ` - -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` - -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` - -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/rocwmma/library/include/ -Wno-ignored-attributes -Wno-nested-anon-types" ` - -DCMAKE_BUILD_TYPE=Release ` - -DGGML_BACKEND_DL=ON ` - -DGGML_NATIVE=OFF ` - -DGGML_CPU=OFF ` - -DAMDGPU_TARGETS="${{ matrix.gpu_targets }}" ` - -DGGML_HIP_ROCWMMA_FATTN=ON ` - -DGGML_HIP=ON ` - -DLLAMA_CURL=OFF - cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS} - md "build\bin\rocblas\library\" - cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\" - cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\" - cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\" - - - name: Pack artifacts - id: pack_artifacts - run: | - 7z a llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\* - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - path: llama-bin-win-hip-${{ matrix.name }}-x64.zip - name: llama-bin-win-hip-${{ matrix.name }}-x64.zip - - ios-xcode-build: - runs-on: macos-15 - - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Setup Xcode - run: | - sudo xcode-select -s /Applications/Xcode_16.4.app - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_CURL=OFF \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TOOLS=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - - name: xcodebuild for swift package - id: xcodebuild - run: | - ./build-xcframework.sh - - - name: Build Xcode project - run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build - - - name: Determine tag name - id: tag - uses: ./.github/actions/get-tag-name - - - name: Pack artifacts - id: pack_artifacts - run: | - zip --symlinks -r llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-xcframework.zip - name: llama-${{ steps.tag.outputs.name }}-xcframework - - release: - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - - # Fine-grant permission - # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token - permissions: - contents: write # for creating release - - runs-on: ubuntu-latest - - needs: - - windows - - windows-cpu - - windows-cuda - - windows-sycl - - windows-hip - - ubuntu-22-cpu - - ubuntu-22-vulkan - - macOS-arm64 - - macOS-x64 - - ios-xcode-build - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Determine tag name - id: tag - uses: ./.github/actions/get-tag-name - - - name: Download artifacts - id: download-artifact - uses: actions/download-artifact@v4 - with: - path: ./artifact - merge-multiple: true - - - name: Move artifacts - id: move_artifacts - run: | - mkdir -p release - - echo "Adding CPU backend files to existing zips..." - for arch in x64 arm64; do - cpu_zip="artifact/llama-bin-win-cpu-${arch}.zip" - temp_dir=$(mktemp -d) - echo "Extracting CPU backend for $arch..." - unzip "$cpu_zip" -d "$temp_dir" - - echo "Adding CPU files to $arch zips..." - for target_zip in artifact/llama-bin-win-*-${arch}.zip; do - if [[ "$target_zip" == "$cpu_zip" ]]; then - continue - fi - echo "Adding CPU backend to $(basename "$target_zip")" - realpath_target_zip=$(realpath "$target_zip") - (cd "$temp_dir" && zip -r "$realpath_target_zip" .) - done - - rm -rf "$temp_dir" - done - - echo "Renaming and moving zips to release..." - for zip_file in artifact/llama-bin-win-*.zip; do - base_name=$(basename "$zip_file" .zip) - zip_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.zip" - echo "Moving $zip_file to release/$zip_name" - mv "$zip_file" "release/$zip_name" - done - - echo "Moving other artifacts..." - mv -v artifact/*.zip release - - - name: Create release - id: create_release - uses: ggml-org/action-create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ steps.tag.outputs.name }} - - - name: Upload release - id: upload_release - uses: actions/github-script@v3 - with: - github-token: ${{secrets.GITHUB_TOKEN}} - script: | - const path = require('path'); - const fs = require('fs'); - const release_id = '${{ steps.create_release.outputs.id }}'; - for (let file of await fs.readdirSync('./release')) { - if (path.extname(file) === '.zip') { - console.log('uploadReleaseAsset', file); - await github.repos.uploadReleaseAsset({ - owner: context.repo.owner, - repo: context.repo.repo, - release_id: release_id, - name: file, - data: await fs.readFileSync(`./release/${file}`) - }); - } - } diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml deleted file mode 100644 index f6da488576937..0000000000000 --- a/.github/workflows/server.yml +++ /dev/null @@ -1,237 +0,0 @@ -# Server build and tests -name: Server - -on: - workflow_dispatch: # allows manual triggering - inputs: - sha: - description: 'Commit SHA1 to build' - required: false - type: string - slow_tests: - description: 'Run slow tests' - required: true - type: boolean - push: - branches: - - master - paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] - pull_request: - types: [opened, synchronize, reopened] - paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*'] - -env: - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 - LLAMA_LOG_VERBOSITY: 10 - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - server: - runs-on: ubuntu-latest - - strategy: - matrix: - sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken - build_type: [RelWithDebInfo] - include: - - build_type: Release - sanitizer: "" - fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken - - steps: - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get -y install \ - build-essential \ - xxd \ - git \ - cmake \ - curl \ - wget \ - language-pack-en \ - libcurl4-openssl-dev - - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - - - name: Python setup - id: setup_python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Tests dependencies - id: test_dependencies - run: | - pip install -r tools/server/tests/requirements.txt - - # Setup nodejs (to be used for verifying bundled index.html) - - uses: actions/setup-node@v4 - with: - node-version: '22.11.0' - - - name: WebUI - Install dependencies - id: webui_lint - run: | - cd tools/server/webui - npm ci - - - name: WebUI - Check code format - id: webui_format - run: | - git config --global --add safe.directory $(realpath .) - cd tools/server/webui - git status - - npm run format - git status - modified_files="$(git status -s)" - echo "Modified files: ${modified_files}" - if [ -n "${modified_files}" ]; then - echo "Files do not follow coding style. To fix: npm run format" - echo "${modified_files}" - exit 1 - fi - - - name: Verify bundled index.html - id: verify_server_index_html - run: | - git config --global --add safe.directory $(realpath .) - cd tools/server/webui - git status - - npm run build - git status - modified_files="$(git status -s)" - echo "Modified files: ${modified_files}" - if [ -n "${modified_files}" ]; then - echo "Repository is dirty or server/webui is not built as expected" - echo "Hint: You may need to follow Web UI build guide in server/README.md" - echo "${modified_files}" - exit 1 - fi - - - name: Build (no OpenMP) - id: cmake_build_no_openmp - if: ${{ matrix.sanitizer == 'THREAD' }} - run: | - cmake -B build \ - -DGGML_NATIVE=OFF \ - -DLLAMA_BUILD_SERVER=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DGGML_OPENMP=OFF ; - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server - - - name: Build (sanitizers) - id: cmake_build_sanitizers - if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }} - run: | - cmake -B build \ - -DGGML_NATIVE=OFF \ - -DLLAMA_BUILD_SERVER=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server - - - name: Build (sanitizers) - id: cmake_build - if: ${{ matrix.sanitizer == '' }} - run: | - cmake -B build \ - -DGGML_NATIVE=OFF \ - -DLLAMA_BUILD_SERVER=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ; - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server - - - name: Tests - id: server_integration_tests - if: ${{ matrix.sanitizer == '' }} - env: - GITHUB_ACTIONS: "true" - run: | - cd tools/server/tests - ./tests.sh - - - name: Tests (sanitizers) - id: server_integration_tests_sanitizers - if: ${{ matrix.sanitizer != '' }} - run: | - cd tools/server/tests - LLAMA_SANITIZE=1 ./tests.sh - - - name: Slow tests - id: server_integration_tests_slow - if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} - run: | - cd tools/server/tests - SLOW_TESTS=1 ./tests.sh - - - server-windows: - runs-on: windows-2022 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - - - name: libCURL - id: get_libcurl - uses: ./.github/actions/windows-setup-curl - - - name: Build - id: cmake_build - env: - CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} - run: | - cmake -B build -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" - cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server - - - name: Python setup - id: setup_python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Tests dependencies - id: test_dependencies - run: | - pip install -r tools/server/tests/requirements.txt - - - name: Copy Libcurl - id: prepare_libcurl - env: - CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} - run: | - cp $env:CURL_PATH/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll - - - name: Tests - id: server_integration_tests - if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} - run: | - cd tools/server/tests - $env:PYTHONIOENCODING = ":replace" - pytest -v -x -m "not slow" - - - name: Slow tests - id: server_integration_tests_slow - if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} - run: | - cd tools/server/tests - $env:SLOW_TESTS = "1" - pytest -v -x diff --git a/.github/workflows/update-ops-docs.yml b/.github/workflows/update-ops-docs.yml deleted file mode 100644 index c0218fa742173..0000000000000 --- a/.github/workflows/update-ops-docs.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Update Operations Documentation - -on: - push: - paths: - - 'docs/ops/**' - - 'scripts/create_ops_docs.py' - pull_request: - paths: - - 'docs/ops/**' - - 'scripts/create_ops_docs.py' - -jobs: - update-ops-docs: - runs-on: ubuntu-latest - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.x' - - - name: Generate operations documentation to temporary file - run: | - mkdir -p /tmp/ops_check - ./scripts/create_ops_docs.py /tmp/ops_check/ops.md - - - name: Check if docs/ops.md matches generated version - run: | - if ! diff -q docs/ops.md /tmp/ops_check/ops.md; then - echo "Operations documentation (docs/ops.md) is not up to date with the backend CSV files." - echo "To fix: run ./scripts/create_ops_docs.py and commit the updated docs/ops.md along with your changes" - echo "Differences found:" - diff docs/ops.md /tmp/ops_check/ops.md || true - exit 1 - fi - echo "Operations documentation is up to date." diff --git a/.github/workflows/vulkan-build.yml b/.github/workflows/vulkan-build.yml new file mode 100644 index 0000000000000..9ddbaf5578199 --- /dev/null +++ b/.github/workflows/vulkan-build.yml @@ -0,0 +1,50 @@ +name: Vulkan Build (No Run) + +on: + push: + branches: ["**"] + pull_request: + +jobs: + build-vulkan: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Install Vulkan build dependencies + run: | + sudo apt-get update + # Vulkan SDK headers/libs + tools needed by GGML's Vulkan backend + sudo apt-get install -y \ + libvulkan-dev \ + vulkan-validationlayers-dev \ + spirv-tools \ + libglslang-dev \ + glslang-tools + + - name: Configure (CMake, Vulkan enabled) + run: | + cmake -S . -B build-vk -DCMAKE_BUILD_TYPE=RelWithDebInfo -DGGML_VULKAN=ON + + - name: Build + run: | + cmake --build build-vk -j8 + + - name: Verify build outputs + run: | + ls -la build-vk/bin || true + test -e build-vk/bin/llama-cli + + - name: Show test command (not executed) + run: | + echo "To run (requires Vulkan-capable GPU):" + echo "GGML_VK_PERF_SILENT=1 GGML_VK_PERF_LOGGER=1 LLAMA_PERFETTO_TRACE=./.pftrace build-vk/bin/llama-cli -m .gguf" + + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: build-vk-bin + path: build-vk/bin/ diff --git a/.github/workflows/winget.yml b/.github/workflows/winget.yml deleted file mode 100644 index 5c286155951e5..0000000000000 --- a/.github/workflows/winget.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: Update Winget Package - -on: - workflow_dispatch: # allows manual triggering - schedule: - - cron: '28 5 * * *' # Update every day at 5:28 UTC - -jobs: - update: - name: Update Winget Package - runs-on: ubuntu-latest - - steps: - - name: Install cargo binstall - uses: cargo-bins/cargo-binstall@268643a6b5ea099f5718ee5cd3ff7dc89a5eb49b - - - name: Install komac - run: | - cargo binstall komac@2.11.2 -y - - - name: Find latest release - id: find_latest_release - uses: actions/github-script@v6 - with: - script: | - const { data: releases } = await github.rest.repos.listReleases({ - owner: context.repo.owner, - repo: context.repo.repo, - }); - console.log("Latest release:", releases[0].tag_name); - return releases[0].tag_name; - - - name: Update manifest - env: - VERSION: ${{ steps.find_latest_release.outputs.result }} - run: | - echo "Updating manifest..." - komac update --version ${{ env.VERSION }} \ - --urls "https://github.com/ggml-org/llama.cpp/releases/download/${{ env.VERSION }}/llama-${{ env.VERSION }}-bin-win-vulkan-x64.zip" \ - --token ${{ secrets.WINGET_GITHUB_TOKEN }} \ - --submit \ - ggml.llamacpp diff --git a/common/common.cpp b/common/common.cpp index 054b43be770da..a8bcb10130e10 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -348,6 +348,9 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD return true; } +extern "C" void llama_perfetto_try_start_from_env(void); +extern "C" void llama_perfetto_stop_flush(void); + void common_init() { llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) { @@ -362,6 +365,11 @@ void common_init() { #endif LOG_INF("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type); + + // Start Perfetto in-process tracing if requested via env vars. + // This enables trace generation for all tools uniformly. + llama_perfetto_try_start_from_env(); + atexit([](){ llama_perfetto_stop_flush(); }); } std::string common_params_get_system_info(const common_params & params) { diff --git a/ggml/include/ggml-vulkan.h b/ggml/include/ggml-vulkan.h index ed5ea5f798cb5..93bc0f0866883 100644 --- a/ggml/include/ggml-vulkan.h +++ b/ggml/include/ggml-vulkan.h @@ -24,6 +24,11 @@ GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type(voi GGML_BACKEND_API ggml_backend_reg_t ggml_backend_vk_reg(void); +// Utility: collect basic GPU counters via Vulkan pipeline statistics +// and dump them to a text file at `path`. Returns true on success. +// Currently records `VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT`. +GGML_BACKEND_API bool ggml_backend_vk_dump_pipeline_stats(int device, const char * path); + #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index 0d5d3a3440aaf..c033dfa8f2f9b 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -38,6 +38,11 @@ #include #endif +// Weak no-op perfetto shims to avoid link errors when ggml is linked as a shared library +// and the C++ perfetto glue is not part of the ggml target. +__attribute__((weak)) void llama_perfetto_trace_begin(const char * name) { (void)name; } +__attribute__((weak)) void llama_perfetto_trace_end(void) { } + #ifdef GGML_USE_OPENMP #include #endif @@ -1754,23 +1759,43 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm } break; case GGML_OP_SILU_BACK: { + // Perfetto: SiLU Backward + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("silu_back"); ggml_compute_forward_silu_back(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_NORM: { + // Perfetto: Norm + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("norm"); ggml_compute_forward_norm(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_RMS_NORM: { + // Perfetto: RMSNorm + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("rms_norm"); ggml_compute_forward_rms_norm(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_RMS_NORM_BACK: { + // Perfetto: RMSNorm Backward + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("rms_norm_back"); ggml_compute_forward_rms_norm_back(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_GROUP_NORM: { + // Perfetto: GroupNorm + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("group_norm"); ggml_compute_forward_group_norm(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_L2_NORM: { @@ -1778,11 +1803,19 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm } break; case GGML_OP_MUL_MAT: { + // Perfetto: MatMul + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("matmul"); ggml_compute_forward_mul_mat(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_MUL_MAT_ID: { + // Perfetto: MatMul (ID) + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("matmul_id"); ggml_compute_forward_mul_mat_id(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_OUT_PROD: { @@ -1846,19 +1879,35 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm } break; case GGML_OP_SOFT_MAX: { + // Perfetto: Softmax + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("softmax"); ggml_compute_forward_soft_max(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_SOFT_MAX_BACK: { + // Perfetto: Softmax Backward + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("softmax_back"); ggml_compute_forward_soft_max_ext_back(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_ROPE: { + // Perfetto: RoPE + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("rope"); ggml_compute_forward_rope(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_ROPE_BACK: { + // Perfetto: RoPE Backward + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("rope_back"); ggml_compute_forward_rope_back(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_CLAMP: { @@ -1934,18 +1983,30 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm } break; case GGML_OP_LEAKY_RELU: { + // Perfetto: LeakyReLU + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("leaky_relu"); ggml_compute_forward_leaky_relu(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_FLASH_ATTN_EXT: { + // Perfetto: FlashAttention (fwd) + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("flash_attn"); ggml_compute_forward_flash_attn_ext(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_FLASH_ATTN_BACK: { + // Perfetto: FlashAttention (back) + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("flash_attn_back"); int32_t t = ggml_get_op_params_i32(tensor, 0); GGML_ASSERT(t == 0 || t == 1); bool masked = t != 0; ggml_compute_forward_flash_attn_back(params, masked, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_SSM_CONV: { @@ -1965,11 +2026,21 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm } break; case GGML_OP_UNARY: { + // Perfetto: Unary (may include relu/gelu/silu/etc.) + #include "../../include/llama_perfetto.h" + enum ggml_unary_op uop = ggml_get_unary_op(tensor); + const char * uname = ggml_unary_op_name(uop); + llama_perfetto_trace_begin(uname); ggml_compute_forward_unary(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_GLU: { + // Perfetto: GLU + #include "../../include/llama_perfetto.h" + llama_perfetto_trace_begin("glu"); ggml_compute_forward_glu(params, tensor); + llama_perfetto_trace_end(); } break; case GGML_OP_GET_REL_POS: { diff --git a/ggml/src/ggml-vulkan/CMakeLists.txt b/ggml/src/ggml-vulkan/CMakeLists.txt index b97e7bf995504..c9b14d0b897a8 100644 --- a/ggml/src/ggml-vulkan/CMakeLists.txt +++ b/ggml/src/ggml-vulkan/CMakeLists.txt @@ -46,6 +46,7 @@ if (Vulkan_FOUND) ggml_add_backend_library(ggml-vulkan ggml-vulkan.cpp + llama_perfetto_stubs.c ../../include/ggml-vulkan.h ) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 04ad664e61c07..3921c077d70cb 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,19 @@ #include #include +// Placeholder GPU counter integration (no-op for now) +#include "../../include/llama_vulkan_prof.h" +// Perfetto CPU-side ML op spans +#include "../../include/llama_perfetto.h" + +// Provide weak no-op GPU counter definitions so ggml (Vulkan) builds without optional counters. +extern "C" { +__attribute__((weak)) void llama_vk_counters_begin(void * command_buffer, const char * label) { + (void)command_buffer; (void)label; +} +__attribute__((weak)) void llama_vk_counters_end(void * command_buffer) { (void)command_buffer; } +} + #if defined(_MSC_VER) # define NOMINMAX 1 # include @@ -576,6 +590,20 @@ struct vk_device_struct { vk::QueryPool query_pool; int32_t num_queries; + // Latest per-node GPU timeline + // - start_rel_ns/end_rel_ns: relative to first GPU timestamp (ns) + // - start_abs_mono_ns/end_abs_mono_ns: absolute CPU monotonic-aligned ns via VK_EXT_calibrated_timestamps (0 if unavailable) + struct timeline_entry { uint64_t start_rel_ns, end_rel_ns; uint64_t start_abs_mono_ns, end_abs_mono_ns; int op; }; + std::vector last_timeline; + + // CPU CLOCK_MONOTONIC timestamp captured immediately after the fence wait + // that produced the timeline above. Used to anchor relative timelines to + // the actual CPU time when the GPU completed, avoiding post-fence drift. + uint64_t last_timeline_anchor_mono_ns { 0 }; + + // Whether VK_EXT_calibrated_timestamps was enabled on this device + bool calibrated_ts_enabled { false }; + ~vk_device_struct() { VK_LOG_DEBUG("destroy device " << name); @@ -1138,6 +1166,28 @@ class vk_memory_logger { #define VK_LOG_MEMORY(msg) ((void) 0) #endif // GGML_VULKAN_MEMORY_DEBUG +// Optional performance logging: can write to stderr, a file, or be silenced. +// Controlled via env: +// - GGML_VK_PERF_LOGGER=1 enable perf logger collection (required) +// - GGML_VK_PERF_TIMELINE=1 print per-node GPU timeline (optional) +// - GGML_VK_PERF_LOG_FILE=path redirect perf logger output to file +// - GGML_VK_PERF_SILENT=1 suppress printing while still collecting data + +static bool vk_perf_silent = false; +class vk_nullbuf : public std::streambuf { public: int overflow(int c) override { return c; } }; +static vk_nullbuf vk_perf_nullbuf; +static std::unique_ptr vk_perf_log_file; +static std::ostream & vk_perf_stream() { + if (vk_perf_silent) { + static std::ostream null_stream(&vk_perf_nullbuf); + return null_stream; + } + if (vk_perf_log_file && vk_perf_log_file->is_open()) { + return *vk_perf_log_file; + } + return std::cerr; +} + class vk_perf_logger { public: void print_timings() { @@ -1145,14 +1195,14 @@ class vk_perf_logger { return; } uint64_t total_all_op_times = 0; - std::cerr << "----------------\nVulkan Timings:" << std::endl; + vk_perf_stream() << "----------------\nVulkan Timings:" << std::endl; for (const auto & t : timings) { uint64_t total_op_times = 0; for (const auto & time : t.second) { total_op_times += time; } - std::cerr << t.first << ": " << t.second.size() << " x " << (total_op_times / t.second.size() / 1000.0) - << " us"; + vk_perf_stream() << t.first << ": " << t.second.size() << " x " << (total_op_times / t.second.size() / 1000.0) + << " us"; // If we have as many flops entries as timing entries for the op, then compute and log the flops/S. auto it = flops.find(t.first); @@ -1161,19 +1211,19 @@ class vk_perf_logger { for (const auto & elem : it->second) { total_op_flops += elem; } - std::cerr << " (" - << (double(total_op_flops) / (1000.0 * 1000.0 * 1000.0)) / - (double(total_op_times) / (1000.0 * 1000.0 * 1000.0)) - << " GFLOPS/s)"; + vk_perf_stream() << " (" + << (double(total_op_flops) / (1000.0 * 1000.0 * 1000.0)) / + (double(total_op_times) / (1000.0 * 1000.0 * 1000.0)) + << " GFLOPS/s)"; } total_all_op_times += total_op_times; - std::cerr << std::endl; + vk_perf_stream() << std::endl; } if (timings.size() > 0) { - std::cerr << "Total time: " << total_all_op_times / 1000.0 << " us." << std::endl; + vk_perf_stream() << "Total time: " << total_all_op_times / 1000.0 << " us." << std::endl; } timings.clear(); @@ -1355,6 +1405,10 @@ struct vk_instance_t { PFN_vkCmdEndDebugUtilsLabelEXT pfn_vkCmdEndDebugUtilsLabelEXT = {}; PFN_vkCmdInsertDebugUtilsLabelEXT pfn_vkCmdInsertDebugUtilsLabelEXT = {}; + // Optional: calibrated timestamps (VK_EXT_calibrated_timestamps) + PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT pfn_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT = {}; + PFN_vkGetCalibratedTimestampsEXT pfn_vkGetCalibratedTimestampsEXT = {}; + std::vector device_indices; vk_device devices[GGML_VK_MAX_DEVICES]; }; @@ -1363,6 +1417,7 @@ static bool vk_instance_initialized = false; static vk_instance_t vk_instance; static bool vk_perf_logger_enabled = false; +static bool vk_perf_timeline_enabled = false; // print raw timestamp spans per op #ifdef GGML_VULKAN_CHECK_RESULTS static size_t vk_skip_checks; @@ -1382,7 +1437,9 @@ static void ggml_vk_wait_for_fence(ggml_backend_vk_context * ctx) { // Use waitForFences while most of the graph executes. Hopefully the CPU can sleep // during this wait. if (ctx->almost_ready_fence_pending) { + llama_perfetto_trace_begin("vkWaitForFences(almost_ready)"); VK_CHECK(ctx->device->device.waitForFences({ ctx->almost_ready_fence }, true, UINT64_MAX), "almost_ready_fence"); + llama_perfetto_trace_end(); ctx->device->device.resetFences({ ctx->almost_ready_fence }); ctx->almost_ready_fence_pending = false; } @@ -1594,7 +1651,10 @@ static void ggml_vk_submit(vk_context& ctx, vk::Fence fence) { if (ctx->seqs.empty()) { if (fence) { std::lock_guard guard(queue_mutex); + // Trace the lightweight submit with fence + llama_perfetto_trace_begin("vkQueueSubmit(empty)"); ctx->p->q->queue.submit({}, fence); + llama_perfetto_trace_end(); } return; } @@ -1664,7 +1724,10 @@ static void ggml_vk_submit(vk_context& ctx, vk::Fence fence) { } std::lock_guard guard(queue_mutex); + // Submitting a batch of command buffers to the GPU queue + llama_perfetto_trace_begin("vkQueueSubmit(batch)"); ctx->p->q->queue.submit(submit_infos, fence); + llama_perfetto_trace_end(); ctx->seqs.clear(); } @@ -3707,6 +3770,21 @@ static vk_device ggml_vk_get_device(size_t idx) { vkGetPhysicalDeviceFeatures2(device->physical_device, &device_features2); + // Prefer enabling calibrated timestamps if supported; used to correlate GPU ticks to CPU clock + bool calibrated_ts_support = false; + { + auto dev_exts = device->physical_device.enumerateDeviceExtensionProperties(); + for (const auto & properties : dev_exts) { + if (strcmp("VK_EXT_calibrated_timestamps", properties.extensionName) == 0) { + calibrated_ts_support = true; + break; + } + } + } + if (calibrated_ts_support) { + device_extensions.push_back("VK_EXT_calibrated_timestamps"); + } + device->fp16 = device->fp16 && vk12_features.shaderFloat16; #if defined(VK_KHR_shader_bfloat16) @@ -3717,6 +3795,11 @@ static vk_device ggml_vk_get_device(size_t idx) { device->pipeline_robustness = pl_robustness_features.pipelineRobustness; + // Remember whether we enabled calibrated timestamps + // (If not enabled, timeline absolute CPU times will remain 0.) + // We assume device_extensions is applied below when creating the device. + device->calibrated_ts_enabled = calibrated_ts_support; + device->multi_add = vk12_props.shaderRoundingModeRTEFloat16 && device->properties.limits.maxPushConstantsSize >= sizeof(vk_op_multi_add_push_constants) && vk12_features.runtimeDescriptorArray && @@ -3958,6 +4041,14 @@ static vk_device ggml_vk_get_device(size_t idx) { device_create_info.setPNext(&device_features2); device->device = device->physical_device.createDevice(device_create_info); + // Resolve calibrated timestamp function pointers if available + if (calibrated_ts_support) { + vk_instance.pfn_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT = + (PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkGetPhysicalDeviceCalibrateableTimeDomainsEXT"); + vk_instance.pfn_vkGetCalibratedTimestampsEXT = + (PFN_vkGetCalibratedTimestampsEXT) vkGetDeviceProcAddr(device->device, "vkGetCalibratedTimestampsEXT"); + } + // Queues ggml_vk_create_queue(device, device->compute_queue, compute_queue_family_index, 0, { vk::PipelineStageFlagBits::eComputeShader | vk::PipelineStageFlagBits::eTransfer }, false); @@ -4281,7 +4372,19 @@ static void ggml_vk_instance_init() { } - vk_perf_logger_enabled = getenv("GGML_VK_PERF_LOGGER") != nullptr; + vk_perf_logger_enabled = getenv("GGML_VK_PERF_LOGGER") != nullptr; + vk_perf_timeline_enabled = getenv("GGML_VK_PERF_TIMELINE") != nullptr; + // Optional redirection/silencing of perf logger output + const char * perf_silent_env = getenv("GGML_VK_PERF_SILENT"); + vk_perf_silent = perf_silent_env != nullptr; + const char * perf_log_path = getenv("GGML_VK_PERF_LOG_FILE"); + if (!vk_perf_silent && perf_log_path && *perf_log_path) { + vk_perf_log_file = std::make_unique(perf_log_path, std::ios::out | std::ios::trunc); + if (!vk_perf_log_file->good()) { + // Fall back to stderr if opening failed + vk_perf_log_file.reset(); + } + } // Emulate behavior of CUDA_VISIBLE_DEVICES for Vulkan char * devices_env = getenv("GGML_VK_VISIBLE_DEVICES"); @@ -4896,7 +4999,19 @@ static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context& 0, { descriptor_set }, {}); + // Begin CPU-side span for this ML dispatch + llama_perfetto_trace_begin(pipeline->name.c_str()); + // Begin GPU region (Perfetto span + placeholder counters) + llama_perfetto_gpu_begin(pipeline->name.c_str()); + llama_vk_counters_begin((void*)subctx->s->buffer, pipeline->name.c_str()); + subctx->s->buffer.dispatch(wg0, wg1, wg2); + + // End GPU region (Perfetto span + placeholder counters) + llama_vk_counters_end((void*)subctx->s->buffer); + llama_perfetto_gpu_end(); + // End CPU-side span for this ML dispatch + llama_perfetto_trace_end(); } static void ggml_vk_end_submission(vk_submission& s, std::vector wait_semaphores, std::vector signal_semaphores) { @@ -5135,7 +5250,9 @@ static void ggml_vk_buffer_write_2d(vk_buffer& dst, size_t offset, const void * } ggml_vk_submit(subctx, dst->device->fence); + llama_perfetto_trace_begin("vkWaitForFences(write_2d)"); VK_CHECK(dst->device->device.waitForFences({ dst->device->fence }, true, UINT64_MAX), "vk_buffer_write_2d waitForFences"); + llama_perfetto_trace_end(); dst->device->device.resetFences({ dst->device->fence }); ggml_vk_queue_command_pools_cleanup(dst->device); } @@ -5222,7 +5339,9 @@ static void ggml_vk_buffer_read(vk_buffer& src, size_t offset, void * dst, size_ ggml_vk_ctx_end(subctx); ggml_vk_submit(subctx, src->device->fence); + llama_perfetto_trace_begin("vkWaitForFences(read)"); VK_CHECK(src->device->device.waitForFences({ src->device->fence }, true, UINT64_MAX), "vk_buffer_read waitForFences"); + llama_perfetto_trace_end(); src->device->device.resetFences({ src->device->fence }); ggml_vk_queue_command_pools_cleanup(src->device); @@ -5252,7 +5371,9 @@ static void ggml_vk_buffer_copy(vk_buffer& dst, size_t dst_offset, vk_buffer& sr ggml_vk_buffer_copy_async(subctx, dst, dst_offset, src, src_offset, size); ggml_vk_ctx_end(subctx); ggml_vk_submit(subctx, src->device->fence); + llama_perfetto_trace_begin("vkWaitForFences(copy)"); VK_CHECK(src->device->device.waitForFences({ src->device->fence }, true, UINT64_MAX), "vk_buffer_copy waitForFences"); + llama_perfetto_trace_end(); src->device->device.resetFences({ src->device->fence }); ggml_vk_queue_command_pools_cleanup(src->device); } else { @@ -5286,7 +5407,9 @@ static void ggml_vk_buffer_memset(vk_buffer& dst, size_t offset, uint32_t c, siz ggml_vk_ctx_end(subctx); ggml_vk_submit(subctx, dst->device->fence); + llama_perfetto_trace_begin("vkWaitForFences(memset)"); VK_CHECK(dst->device->device.waitForFences({ dst->device->fence }, true, UINT64_MAX), "vk_memset waitForFences"); + llama_perfetto_trace_end(); dst->device->device.resetFences({ dst->device->fence }); ggml_vk_queue_command_pools_cleanup(dst->device); } @@ -9251,7 +9374,9 @@ static void ggml_vk_test_matmul(ggml_backend_vk_context * ctx, size_t m, size_t auto begin = std::chrono::high_resolution_clock::now(); ggml_vk_submit(subctx, ctx->fence); + llama_perfetto_trace_begin("vkWaitForFences(test_matmul)"); VK_CHECK(ctx->device->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_test_matmul waitForFences"); + llama_perfetto_trace_end(); ctx->device->device.resetFences({ ctx->fence }); ggml_vk_queue_command_pools_cleanup(ctx->device); @@ -9458,7 +9583,9 @@ static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_ auto begin = std::chrono::high_resolution_clock::now(); ggml_vk_submit(subctx, ctx->fence); + llama_perfetto_trace_begin("vkWaitForFences(test_dequant)"); VK_CHECK(ctx->device->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_test_dequant waitForFences"); + llama_perfetto_trace_end(); ctx->device->device.resetFences({ ctx->fence }); ggml_vk_queue_command_pools_cleanup(ctx->device); @@ -9752,7 +9879,9 @@ static void ggml_vk_test_dequant_matmul(ggml_backend_vk_context * ctx, size_t m, auto begin = std::chrono::high_resolution_clock::now(); ggml_vk_submit(subctx, ctx->fence); + llama_perfetto_trace_begin("vkWaitForFences(test_dequant)"); VK_CHECK(ctx->device->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_test_dequant waitForFences"); + llama_perfetto_trace_end(); ctx->device->device.resetFences({ ctx->fence }); ggml_vk_queue_command_pools_cleanup(ctx->device); @@ -10257,6 +10386,17 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr } } + // Begin CPU-side ML span for this high-level op (non-dryrun only) + if (!dryrun) { + const char * ml_name = ggml_op_name(node->op); + if (node->op == GGML_OP_UNARY) { + ml_name = ggml_unary_op_name(ggml_get_unary_op(node)); + } else if (node->op == GGML_OP_GLU) { + ml_name = ggml_glu_op_name(ggml_get_glu_op(node)); + } + llama_perfetto_trace_begin(ml_name); + } + switch (node->op) { case GGML_OP_REPEAT: ggml_vk_repeat(ctx, compute_ctx, src0, node, dryrun); @@ -10553,6 +10693,8 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr } } + // End CPU-side ML span + llama_perfetto_trace_end(); return true; } @@ -10685,10 +10827,14 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_cgraph * } if (almost_ready && !ctx->almost_ready_fence_pending && !use_fence) { + llama_perfetto_trace_begin("vkQueueSubmit(almost_ready)"); ggml_vk_submit(subctx, ctx->almost_ready_fence); + llama_perfetto_trace_end(); ctx->almost_ready_fence_pending = true; } else { + llama_perfetto_trace_begin("vkQueueSubmit(compute)"); ggml_vk_submit(subctx, use_fence ? ctx->fence : vk::Fence{}); + llama_perfetto_trace_end(); } if (use_fence) { @@ -11407,15 +11553,96 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg ggml_vk_ctx_end(compute_ctx); ggml_vk_submit(compute_ctx, ctx->device->fence); + llama_perfetto_trace_begin("vkWaitForFences(perf)"); VK_CHECK(ctx->device->device.waitForFences({ ctx->device->fence }, true, UINT64_MAX), "GGML_VULKAN_PERF waitForFences"); + // Capture a CPU monotonic anchor at the moment the GPU work is known complete +#if !defined(_WIN32) + timespec ts_anchor{}; clock_gettime(CLOCK_MONOTONIC, &ts_anchor); + ctx->device->last_timeline_anchor_mono_ns = uint64_t(ts_anchor.tv_sec) * 1000000000ull + uint64_t(ts_anchor.tv_nsec); +#else + ctx->device->last_timeline_anchor_mono_ns = 0; // TODO: add QPC-based anchor on Windows +#endif + llama_perfetto_trace_end(); ctx->device->device.resetFences({ ctx->device->fence }); // Get the results and pass them to the logger std::vector timestamps(cgraph->n_nodes + 1); VK_CHECK(ctx->device->device.getQueryPoolResults(ctx->device->query_pool, 0, cgraph->n_nodes + 1, (cgraph->n_nodes + 1)*sizeof(uint64_t), timestamps.data(), sizeof(uint64_t), vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait), "get timestamp results"); + // Save a simple timeline snapshot for external consumers (relative time domain) + ctx->device->last_timeline.clear(); + const uint64_t t0 = timestamps[0]; + // Optionally compute an absolute CPU-monotonic aligned timeline via calibrated timestamps + bool abs_timeline_ok = false; + uint64_t cpu_mono_now_ns = 0; + uint64_t gpu_now_ticks = 0; + if (ctx->device->calibrated_ts_enabled && + vk_instance.pfn_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && + vk_instance.pfn_vkGetCalibratedTimestampsEXT) { + // Query calibrateable time domains + uint32_t count = 0; + VkResult r = vk_instance.pfn_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT(ctx->device->physical_device, &count, nullptr); + if (r == VK_SUCCESS && count > 0) { + std::vector domains(count); + r = vk_instance.pfn_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT(ctx->device->physical_device, &count, domains.data()); + if (r == VK_SUCCESS) { + bool have_mono = false; + for (auto d : domains) { if (d == VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT) { have_mono = true; break; } } + if (have_mono) { + VkCalibratedTimestampInfoEXT infos[2] = {}; + infos[0].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; + infos[0].timeDomain = VK_TIME_DOMAIN_DEVICE_EXT; + infos[1].sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT; + infos[1].timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT; + uint64_t ts_out[2] = {0,0}; + uint64_t max_dev = 0; + r = vk_instance.pfn_vkGetCalibratedTimestampsEXT(ctx->device->device, 2, infos, ts_out, &max_dev); + if (r == VK_SUCCESS) { + gpu_now_ticks = ts_out[0]; + cpu_mono_now_ns = ts_out[1]; + abs_timeline_ok = true; + } + } + } + } + } for (int i = 0; i < cgraph->n_nodes; i++) { if (!ggml_vk_is_empty(cgraph->nodes[i])) { ctx->device->perf_logger->log_timing(cgraph->nodes[i], uint64_t((timestamps[i+1] - timestamps[i]) * ctx->device->properties.limits.timestampPeriod)); + vk_device_struct::timeline_entry e; + e.start_rel_ns = uint64_t((timestamps[i] - t0) * ctx->device->properties.limits.timestampPeriod); + e.end_rel_ns = uint64_t((timestamps[i+1] - t0) * ctx->device->properties.limits.timestampPeriod); + if (abs_timeline_ok) { + const double period_ns = ctx->device->properties.limits.timestampPeriod; + // Map each GPU tick to CPU monotonic ns using the calibrated pair (gpu_now_ticks -> cpu_mono_now_ns) + const double start_ns = double(cpu_mono_now_ns) - period_ns * double(gpu_now_ticks - timestamps[i]); + const double end_ns = double(cpu_mono_now_ns) - period_ns * double(gpu_now_ticks - timestamps[i+1]); + e.start_abs_mono_ns = (start_ns <= 0.0) ? 0ULL : (uint64_t) start_ns; + e.end_abs_mono_ns = (end_ns <= 0.0) ? 0ULL : (uint64_t) end_ns; + } else { + e.start_abs_mono_ns = 0; + e.end_abs_mono_ns = 0; + } + e.op = cgraph->nodes[i]->op; + ctx->device->last_timeline.push_back(e); + } + } + + // Optionally print a timestamp timeline for span reconstruction + if (vk_perf_timeline_enabled) { + const double period_ns = ctx->device->properties.limits.timestampPeriod; + const double base_ns = double(timestamps[0]) * period_ns; + for (int i = 0; i < cgraph->n_nodes; i++) { + if (!ggml_vk_is_empty(cgraph->nodes[i])) { + const double start_ns = double(timestamps[i]) * period_ns - base_ns; + const double end_ns = double(timestamps[i+1]) * period_ns - base_ns; + const double start_us = start_ns / 1000.0; + const double end_us = end_ns / 1000.0; + const char * op_name = ggml_op_name(cgraph->nodes[i]->op); + vk_perf_stream() << "[GPU-TL] #" << i << " " << op_name + << " start_us=" << std::fixed << std::setprecision(3) << start_us + << " end_us=" << std::fixed << std::setprecision(3) << end_us + << std::endl; + } } } @@ -11497,6 +11724,120 @@ void ggml_backend_vk_get_device_memory(int device, size_t * free, size_t * total } } +// Collect basic GPU counters using Vulkan pipeline statistics and dump to a file. +// This records the number of compute shader invocations in an empty pass, which +// is generally 0 unless wrapped around work. It is intended as a minimal example +// to be expanded to surround real workloads. +extern "C" bool ggml_backend_vk_dump_pipeline_stats(int device, const char * path) { + if (path == nullptr) return false; + if (device < 0 || device >= (int) vk_instance.device_indices.size()) return false; + + try { + // Resolve device and queue + int idx = vk_instance.device_indices[device]; + vk_device dev = ggml_vk_get_device(idx); + vk::PhysicalDevice phys = vk_instance.instance.enumeratePhysicalDevices()[idx]; + + // Check support for pipeline statistics queries; MoltenVK typically does not support this. + vk::PhysicalDeviceFeatures feats = phys.getFeatures(); + if (!feats.pipelineStatisticsQuery) { + return false; + } + + // Create a pipeline statistics query pool for compute shader invocations + vk::QueryPipelineStatisticFlags stats = vk::QueryPipelineStatisticFlagBits::eComputeShaderInvocations; + vk::QueryPoolCreateInfo qp_info({}, vk::QueryType::ePipelineStatistics, 1 /* queries */, stats); + vk::QueryPool qp = dev->device.createQueryPool(qp_info); + + // Reset query on host (promoted in Vulkan 1.2 or via EXT_host_query_reset) + dev->device.resetQueryPool(qp, 0, 1); + + // Allocate one-time command buffer on compute queue's pool + vk::CommandBufferAllocateInfo alloc_info(dev->compute_queue.cmd_pool.pool, vk::CommandBufferLevel::ePrimary, 1); + auto cbs = dev->device.allocateCommandBuffers(alloc_info); + vk::CommandBuffer cb = cbs[0]; + + vk::CommandBufferBeginInfo begin_info(vk::CommandBufferUsageFlagBits::eOneTimeSubmit); + cb.begin(begin_info); + cb.beginQuery(qp, 0, {}); + // Intentionally no work here; extend by inserting dispatches around begin/endQuery. + cb.endQuery(qp, 0); + cb.end(); + + // Submit and wait + vk::SubmitInfo si({}, {}, cb, {}); + dev->compute_queue.queue.submit(si, {}); + dev->compute_queue.queue.waitIdle(); + + // Read back results (single 64-bit value) + uint64_t value = 0; + vk::Result res = dev->device.getQueryPoolResults(qp, 0, 1, sizeof(uint64_t), &value, sizeof(uint64_t), vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait); + + // Destroy resources + dev->device.freeCommandBuffers(dev->compute_queue.cmd_pool.pool, cb); + dev->device.destroyQueryPool(qp); + + if (res != vk::Result::eSuccess) { + return false; + } + + // Dump to file + std::ofstream ofs(path); + if (!ofs.good()) return false; + ofs << "compute_shader_invocations: " << value << "\n"; + ofs.close(); + return true; + } catch (const std::exception&) { + return false; + } +} + +// Dump the latest timestamp-derived timeline to CSV for external tools. +// Format: start_rel_ns,end_rel_ns,op_name +extern "C" bool ggml_backend_vk_dump_timeline(int device, const char * path) { + if (!path) return false; + if (device < 0 || device >= (int) vk_instance.device_indices.size()) return false; + try { + int idx = vk_instance.device_indices[device]; + vk_device dev = ggml_vk_get_device(idx); + if (dev->last_timeline.empty()) return false; + std::ofstream ofs(path); + if (!ofs.good()) return false; + for (const auto & e : dev->last_timeline) { + ofs << e.start_rel_ns << "," << e.end_rel_ns << "," << ggml_op_name((ggml_op)e.op) << "\n"; + } + ofs.close(); + return true; + } catch (...) { + return false; + } +} + +// Dump the latest CPU-monotonic aligned timeline to CSV for external tools. +// Format: start_abs_ns,end_abs_ns,op_name +extern "C" bool ggml_backend_vk_dump_timeline_abs(int device, const char * path) { + if (!path) return false; + if (device < 0 || device >= (int) vk_instance.device_indices.size()) return false; + try { + int idx = vk_instance.device_indices[device]; + vk_device dev = ggml_vk_get_device(idx); + if (dev->last_timeline.empty()) return false; + // Ensure we have absolute times populated + bool any_abs = false; + for (const auto & e : dev->last_timeline) { if (e.start_abs_mono_ns || e.end_abs_mono_ns) { any_abs = true; break; } } + if (!any_abs) return false; + std::ofstream ofs(path); + if (!ofs.good()) return false; + for (const auto & e : dev->last_timeline) { + ofs << e.start_abs_mono_ns << "," << e.end_abs_mono_ns << "," << ggml_op_name((ggml_op)e.op) << "\n"; + } + ofs.close(); + return true; + } catch (...) { + return false; + } +} + ////////////////////////// struct ggml_backend_vk_device_context { @@ -12708,3 +13049,10 @@ static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_cgraph * #endif GGML_BACKEND_DL_IMPL(ggml_backend_vk_reg) +// Return the CPU CLOCK_MONOTONIC anchor in ns captured right after the fence wait +extern "C" uint64_t ggml_backend_vk_get_timeline_anchor_mono_ns(int device) { + if (device < 0 || device >= (int) vk_instance.device_indices.size()) return 0ULL; + int idx = vk_instance.device_indices[device]; + vk_device dev = ggml_vk_get_device(idx); + return dev ? dev->last_timeline_anchor_mono_ns : 0ULL; +} diff --git a/ggml/src/ggml-vulkan/llama_perfetto_stubs.c b/ggml/src/ggml-vulkan/llama_perfetto_stubs.c new file mode 100644 index 0000000000000..e65548c415734 --- /dev/null +++ b/ggml/src/ggml-vulkan/llama_perfetto_stubs.c @@ -0,0 +1,36 @@ +// Weak stubs for Perfetto tracing symbols used by ggml-vulkan. +// These allow building the Vulkan backend without linking the full +// llama Perfetto implementation. If the real implementations are +// linked elsewhere (e.g., from src/llama_perfetto.cpp), they will +// override these weak symbols. + +#include + +#if defined(__GNUC__) || defined(__clang__) +#define WEAK __attribute__((weak)) +#else +#define WEAK +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +WEAK void llama_perfetto_trace_begin(const char * name) { + (void)name; +} + +WEAK void llama_perfetto_trace_end(void) { +} + +WEAK void llama_perfetto_gpu_begin(const char * name) { + (void)name; +} + +WEAK void llama_perfetto_gpu_end(void) { +} + +#ifdef __cplusplus +} +#endif + diff --git a/include/llama_perfetto.h b/include/llama_perfetto.h new file mode 100644 index 0000000000000..e6da5cc916430 --- /dev/null +++ b/include/llama_perfetto.h @@ -0,0 +1,47 @@ +// Lightweight C-callable shim for Perfetto tracepoints used across C and C++. +// If Perfetto SDK is not available or tracing is disabled, these become no-ops. + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +// Begin a CPU trace span for ML operation `name`. +void llama_perfetto_trace_begin(const char * name); + +// Begin a CPU trace span and attach a string argument named "text". +// Useful for spans like "decode" where we want to see the token string. +void llama_perfetto_trace_begin_with_text(const char * name, const char * text); + +// End the most recent CPU trace span started with begin. +void llama_perfetto_trace_end(void); + +// GPU span helpers (Vulkan compute regions) +void llama_perfetto_gpu_begin(const char * name); +void llama_perfetto_gpu_end(void); + +// Optional: start/stop a trace session writing TrackEvent data to a file. +// If you prefer env control, call `llama_perfetto_try_start_from_env()`. +void llama_perfetto_start_trace(const char * path); +void llama_perfetto_stop_flush(void); +// Flushes the active tracing session (if any) and writes Vulkan stats +// without stopping the tracing session. Safe to call multiple times. +void llama_perfetto_flush_dump_stats(void); +// Prints Vulkan GPU counters to stdout if available. +void llama_perfetto_print_gpu_stats(void); +// Emits a GPU timeline track into the Perfetto trace (if tracing). +// Uses the latest Vulkan timestamp batch and anchors it to the current +// trace clock so spans end at "now" preserving relative shape. +void llama_perfetto_emit_gpu_timeline(void); +void llama_perfetto_try_start_from_env(void); + +// Counter helpers +// Emits a Perfetto counter sample for tokens per second (throughput). +void llama_perfetto_counter_tokens_per_s(double tokens_per_s); +// Optionally, emit GPU busy percent [0..100]. Usually called internally. +void llama_perfetto_counter_gpu_busy(double percent); + +#ifdef __cplusplus +} +#endif diff --git a/include/llama_vulkan_prof.h b/include/llama_vulkan_prof.h new file mode 100644 index 0000000000000..c4315319f6f2f --- /dev/null +++ b/include/llama_vulkan_prof.h @@ -0,0 +1,18 @@ +// Placeholder Vulkan GPU performance counter collection API. +// Currently no-ops; wired in ggml-vulkan backend for future enablement. + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +// Opaque handle to command buffer (as void* to avoid including Vulkan headers here). +// `label` is a short name for the region, e.g., pipeline name. +void llama_vk_counters_begin(void * command_buffer, const char * label); +void llama_vk_counters_end(void * command_buffer); + +#ifdef __cplusplus +} +#endif + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 18cfc76564d36..bcb770618893e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -35,6 +35,8 @@ add_library(llama unicode-data.cpp unicode.cpp unicode.h + profiler/llama_perfetto.cpp + llama_vulkan_prof.cpp ) target_include_directories(llama PRIVATE .) @@ -48,3 +50,17 @@ if (BUILD_SHARED_LIBS) target_compile_definitions(llama PRIVATE LLAMA_BUILD) target_compile_definitions(llama PUBLIC LLAMA_SHARED) endif() + +# +# Perfetto SDK (amalgamated) integration +# +# The SDK is vendored under `src/profiler` as `perfetto.cc/.h`. +# Build it into a static lib and link it so tracepoints are available across platforms +# (macOS, Linux, Android; incl. Vulkan/Mali). +add_library(perfetto_sdk STATIC profiler/perfetto.cc) + +target_include_directories(perfetto_sdk PUBLIC ${CMAKE_SOURCE_DIR}/src/profiler) +target_compile_features(perfetto_sdk PRIVATE cxx_std_17) + +# Link Perfetto into the core library for all builds +target_link_libraries(llama PRIVATE perfetto_sdk) diff --git a/src/llama_vulkan_prof.cpp b/src/llama_vulkan_prof.cpp new file mode 100644 index 0000000000000..a3c5740280f37 --- /dev/null +++ b/src/llama_vulkan_prof.cpp @@ -0,0 +1,15 @@ +// Stub implementation of Vulkan GPU performance counter collection. +// Filled in later for platforms/extensions that support it. + +#include +#include "../include/llama_vulkan_prof.h" + +extern "C" void llama_vk_counters_begin(void * command_buffer, const char * label) { + (void)command_buffer; + (void)label; +} + +extern "C" void llama_vk_counters_end(void * command_buffer) { + (void)command_buffer; +} + diff --git a/src/profiler/README.md b/src/profiler/README.md new file mode 100644 index 0000000000000..2169c66685fa6 --- /dev/null +++ b/src/profiler/README.md @@ -0,0 +1,10 @@ +Profiler integration for llama.cpp +---------------------------------- + +- `llama_perfetto.cpp`: C/C++ glue exposing simple C-callable helpers used across the codebase. +- `perfetto.cc` / `perfetto.h`: Perfetto SDK amalgamation (vendored). + +Notes +- The integration is backend-agnostic and works with CPU-only builds. +- For Vulkan, optional counters/timeline hooks are resolved dynamically at runtime. This covers + platforms like macOS, Linux (including Mali GPUs), and Android. diff --git a/src/profiler/llama_perfetto.cpp b/src/profiler/llama_perfetto.cpp new file mode 100644 index 0000000000000..44ddb35a0b282 --- /dev/null +++ b/src/profiler/llama_perfetto.cpp @@ -0,0 +1,434 @@ +// Perfetto C++ glue for C-callable trace shims. + +#include +#include +#include +#include +#include +#include +#include +#if defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include +#endif +#if !defined(_WIN32) +#include +#include +#endif +#include +#include +#include +#include +#include + +#include "perfetto.h" +#include "llama_perfetto.h" + +// TrackEvent setup: define categories first, then static storage. +PERFETTO_DEFINE_CATEGORIES( + perfetto::Category("ML").SetDescription("High-level ML ops (CPU)"), + perfetto::Category("GPU").SetDescription("Vulkan compute dispatches") +); +PERFETTO_TRACK_EVENT_STATIC_STORAGE(); + +static void llama_perfetto_init_once() { + static std::once_flag once; + std::call_once(once, [] { + perfetto::TracingInitArgs args; + // Ensure an in-process backend is available for local tracing. + args.backends = perfetto::BackendType::kInProcessBackend; + perfetto::Tracing::Initialize(args); + perfetto::TrackEvent::Register(); + }); +} + +extern "C" void llama_perfetto_trace_begin(const char * name) { + llama_perfetto_init_once(); + if (name == nullptr) name = "op"; + TRACE_EVENT_BEGIN("ML", perfetto::DynamicString(name)); +} + +extern "C" void llama_perfetto_trace_begin_with_text(const char * name, const char * text) { + llama_perfetto_init_once(); + if (name == nullptr) name = "op"; + const char * arg = text ? text : ""; + // Attach the token string as an argument named "text". + TRACE_EVENT_BEGIN("ML", perfetto::DynamicString(name), + "text", perfetto::DynamicString(arg)); +} + +extern "C" void llama_perfetto_trace_end(void) { + llama_perfetto_init_once(); + TRACE_EVENT_END("ML"); +} + +extern "C" void llama_perfetto_gpu_begin(const char * name) { + llama_perfetto_init_once(); + if (name == nullptr) name = "vk_dispatch"; + TRACE_EVENT_BEGIN("GPU", perfetto::DynamicString(name)); +} + +extern "C" void llama_perfetto_gpu_end(void) { + llama_perfetto_init_once(); + TRACE_EVENT_END("GPU"); +} + +// In-process trace session management +static std::unique_ptr g_session; +static std::atomic g_flush_stop{true}; +static std::thread g_flush_thread; +static int g_trace_fd = -1; +static std::string g_trace_path; + +// Optional Vulkan stats hooks resolved dynamically when ggml-vulkan is loaded. +using fn_vk_dump_stats_t = bool (*)(int, const char *); +using fn_vk_dump_timeline_t = bool (*)(int, const char *); +using fn_vk_dump_timeline_abs_t = bool (*)(int, const char *); +using fn_vk_get_anchor_mono_ns_t = uint64_t (*)(int); +using fn_vk_get_desc_t = void (*)(int, char *, size_t); +using fn_vk_get_mem_t = void (*)(int, size_t *, size_t *); + +static std::atomic g_vk_syms_resolved{false}; +static fn_vk_dump_stats_t g_vk_dump_stats = nullptr; +static fn_vk_get_desc_t g_vk_get_desc = nullptr; +static fn_vk_get_mem_t g_vk_get_mem = nullptr; +static fn_vk_dump_timeline_t g_vk_dump_timeline = nullptr; +static fn_vk_dump_timeline_abs_t g_vk_dump_timeline_abs = nullptr; +static fn_vk_get_anchor_mono_ns_t g_vk_get_anchor_mono_ns = nullptr; + +static void llama_perfetto_resolve_vk_syms_once() { + bool expected = false; + if (!g_vk_syms_resolved.compare_exchange_strong(expected, true)) return; +#if defined(_WIN32) + // Best-effort: resolve from current process address space. + HMODULE self = GetModuleHandleA(nullptr); + if (self) { + g_vk_dump_stats = reinterpret_cast(GetProcAddress(self, "ggml_backend_vk_dump_pipeline_stats")); + g_vk_get_desc = reinterpret_cast(GetProcAddress(self, "ggml_backend_vk_get_device_description")); + g_vk_get_mem = reinterpret_cast(GetProcAddress(self, "ggml_backend_vk_get_device_memory")); + g_vk_dump_timeline = reinterpret_cast(GetProcAddress(self, "ggml_backend_vk_dump_timeline")); + g_vk_dump_timeline_abs = reinterpret_cast(GetProcAddress(self, "ggml_backend_vk_dump_timeline_abs")); + g_vk_get_anchor_mono_ns = reinterpret_cast(GetProcAddress(self, "ggml_backend_vk_get_timeline_anchor_mono_ns")); + } +#else + g_vk_dump_stats = reinterpret_cast(dlsym(RTLD_DEFAULT, "ggml_backend_vk_dump_pipeline_stats")); + g_vk_get_desc = reinterpret_cast(dlsym(RTLD_DEFAULT, "ggml_backend_vk_get_device_description")); + g_vk_get_mem = reinterpret_cast(dlsym(RTLD_DEFAULT, "ggml_backend_vk_get_device_memory")); + g_vk_dump_timeline = reinterpret_cast(dlsym(RTLD_DEFAULT, "ggml_backend_vk_dump_timeline")); + g_vk_dump_timeline_abs = reinterpret_cast(dlsym(RTLD_DEFAULT, "ggml_backend_vk_dump_timeline_abs")); + g_vk_get_anchor_mono_ns = reinterpret_cast(dlsym(RTLD_DEFAULT, "ggml_backend_vk_get_timeline_anchor_mono_ns")); +#endif +} + +extern "C" void llama_perfetto_start_trace(const char * path) { + if (g_session) { + // Already started; ignore duplicate start. + return; + } + if (!path || !*path) return; + llama_perfetto_init_once(); + + perfetto::TraceConfig cfg; + cfg.add_buffers()->set_size_kb(1024 * 64); + auto * ds = cfg.add_data_sources(); + ds->mutable_config()->set_name("track_event"); + // Enable all TrackEvent categories ("*") so our ML/GPU spans are recorded + perfetto::protos::gen::TrackEventConfig te; + te.add_enabled_categories("*"); + ds->mutable_config()->set_track_event_config_raw(te.SerializeAsString()); + + g_session = perfetto::Tracing::NewTrace(); + // open file and pass fd to Setup so Perfetto writes directly into it + int fd = -1; +#if defined(_WIN32) + // Omit Windows fd handling in this patch +#else + fd = ::open(path, O_CREAT | O_TRUNC | O_WRONLY, 0644); +#endif + g_session->Setup(cfg, fd); + g_session->StartBlocking(); + + g_trace_fd = fd; + g_trace_path = path; + g_flush_stop = false; + // Background flusher to minimize data loss on abrupt termination (e.g., SIGKILL). + g_flush_thread = std::thread([]{ + using namespace std::chrono_literals; + while (!g_flush_stop.load(std::memory_order_relaxed)) { + std::this_thread::sleep_for(200ms); + if (g_session) { + g_session->FlushBlocking(0); + } + if (g_trace_fd != -1) { + ::fsync(g_trace_fd); + } + } + }); +} + +extern "C" void llama_perfetto_stop_flush(void) { + g_flush_stop = true; + if (g_flush_thread.joinable()) g_flush_thread.join(); + if (g_session) { + g_session->FlushBlocking(0); + g_session->StopBlocking(); + g_session.reset(); + } + if (g_trace_fd != -1) { + ::fsync(g_trace_fd); + ::close(g_trace_fd); + g_trace_fd = -1; + } + + // If Vulkan backend is present, dump basic GPU pipeline stats alongside the Perfetto file. + llama_perfetto_resolve_vk_syms_once(); + if (!g_trace_path.empty() && g_vk_dump_stats) { + std::string stats_path = g_trace_path + ".vkstats"; + // Use logical device index 0 by default; adjust if multi-GPU selection is exposed. + (void)g_vk_dump_stats(0, stats_path.c_str()); + } +} + +extern "C" void llama_perfetto_flush_dump_stats(void) { + // Best-effort flush of trace buffers without stopping the session. + if (g_session) { + g_session->FlushBlocking(0); + } + if (g_trace_fd != -1) { + ::fsync(g_trace_fd); + } + // Emit Vulkan pipeline stats next to the trace file if available. + llama_perfetto_resolve_vk_syms_once(); + if (!g_trace_path.empty() && g_vk_dump_stats) { + std::string stats_path = g_trace_path + ".vkstats"; + (void)g_vk_dump_stats(0, stats_path.c_str()); + } +} + +static std::string llama_perfetto_tmp_stats_path() { + // Prefer trace-adjacent path if tracing is active, else temp dir. + if (!g_trace_path.empty()) { + return g_trace_path + ".vkstats"; + } + try { + auto dir = std::filesystem::temp_directory_path(); + auto ts = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()).count(); + // Avoid Windows headers; pid is best-effort for uniqueness +#if defined(_WIN32) + int pid = 0; +#else + int pid = (int) getpid(); +#endif + return (dir / ("llama_vkstats_" + std::to_string(pid) + "_" + std::to_string(ts) + ".txt")).string(); + } catch (...) { + return "llama_vkstats.txt"; // fallback to cwd + } +} + +extern "C" void llama_perfetto_print_gpu_stats(void) { + static std::atomic warned{false}; + llama_perfetto_resolve_vk_syms_once(); + if (!g_vk_dump_stats) { + return; + } + std::string path = llama_perfetto_tmp_stats_path(); + if (!g_vk_dump_stats(0, path.c_str())) { + // Feature not supported or failure; print a one-time notice with minimal device info. + bool expected = false; + if (warned.compare_exchange_strong(expected, true)) { + char desc[256] = {0}; + if (g_vk_get_desc) { + g_vk_get_desc(0, desc, sizeof(desc)); + } + size_t free_b = 0, total_b = 0; + if (g_vk_get_mem) { + g_vk_get_mem(0, &free_b, &total_b); + } + fprintf(stdout, + "[GPU] Vulkan pipeline statistics not supported on this device%s%s.\n", + desc[0] ? ": " : "", + desc); + if (total_b) { + fprintf(stdout, "[GPU] Reported device-local memory: %.2f GiB.\n", (double) total_b / (1024.0*1024.0*1024.0)); + } + } + return; + } + std::ifstream ifs(path); + if (!ifs.good()) { + return; + } + std::string line; + while (std::getline(ifs, line)) { + // Print raw line; main uses stdout redirection already + fprintf(stdout, "[GPU] %s\n", line.c_str()); + } + ifs.close(); + // Best effort cleanup for temp file when not using the trace-adjacent name + if (g_trace_path.empty()) { + std::error_code ec; + std::filesystem::remove(path, ec); + } +} + +extern "C" void llama_perfetto_counter_tokens_per_s(double tokens_per_s) { + llama_perfetto_init_once(); + // If tracing is not active, this is still a no-op cost-wise + TRACE_COUNTER("ML", "tokens_per_s", tokens_per_s); +} + +extern "C" void llama_perfetto_counter_gpu_busy(double percent) { + llama_perfetto_init_once(); + TRACE_COUNTER("GPU", "gpu_busy_percent", percent); +} + +extern "C" void llama_perfetto_emit_gpu_timeline(void) { + // Only emit if tracing is active and we can get a timeline snapshot. + if (!g_session) return; + llama_perfetto_resolve_vk_syms_once(); + if (!g_vk_dump_timeline) return; + + // Prefer absolute CPU-monotonic-aligned timeline if available + bool emitted = false; + if (g_vk_dump_timeline_abs) { + std::string path_abs = llama_perfetto_tmp_stats_path(); + if (path_abs.size() >= 8) path_abs.replace(path_abs.size() - 7, 7, "vktimeline.abs"); + if (g_vk_dump_timeline_abs(0, path_abs.c_str())) { + std::ifstream ifsa(path_abs); + if (ifsa.good()) { + struct AbsEntry { std::string name; uint64_t s_abs; uint64_t e_abs; }; + std::vector aentries; aentries.reserve(256); + std::string line; + while (std::getline(ifsa, line)) { + size_t c1 = line.find(','); + size_t c2 = line.find(',', c1 == std::string::npos ? 0 : c1 + 1); + if (c1 == std::string::npos || c2 == std::string::npos) continue; + uint64_t s = strtoull(line.substr(0, c1).c_str(), nullptr, 10); + uint64_t e = strtoull(line.substr(c1 + 1, c2 - c1 - 1).c_str(), nullptr, 10); + std::string name = line.substr(c2 + 1); + if (e > s && s != 0ULL) aentries.push_back({std::move(name), s, e}); + } + ifsa.close(); + { std::error_code ec; std::filesystem::remove(path_abs, ec); } + if (!aentries.empty()) { + // Map CPU monotonic ns -> Perfetto trace ns by sampling both now and using constant offset + uint64_t trace_now = perfetto::TrackEvent::GetTraceTimeNs(); +#if !defined(_WIN32) + timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); + uint64_t mono_now = uint64_t(ts.tv_sec) * 1000000000ull + uint64_t(ts.tv_nsec); + int64_t offset = (int64_t)trace_now - (int64_t)mono_now; +#else + // Windows: fallback to relative anchoring below + int64_t offset = 0; +#endif + auto gpu_track = perfetto::Track(0x47505551304ULL); + static std::atomic desc{false}; + bool expected = false; + if (desc.compare_exchange_strong(expected, true)) { + perfetto::protos::gen::TrackDescriptor td; td.set_name("GPU Queue 0"); + perfetto::TrackEvent::SetTrackDescriptor(gpu_track, td); + } + struct Ev { uint64_t ts; int delta; }; + std::vector evs; evs.reserve(aentries.size()*2); + for (const auto & e : aentries) { + uint64_t start_ns = (uint64_t)((int64_t)e.s_abs + offset); + uint64_t end_ns = (uint64_t)((int64_t)e.e_abs + offset); + TRACE_EVENT_BEGIN("GPU", perfetto::DynamicString(e.name.c_str()), gpu_track, start_ns); + TRACE_EVENT_END("GPU", gpu_track, end_ns); + evs.push_back({start_ns, +1}); + evs.push_back({end_ns, -1}); + } + if (!evs.empty()) { + std::sort(evs.begin(), evs.end(), [](const Ev &a, const Ev &b){ + if (a.ts != b.ts) return a.ts < b.ts; + return a.delta < b.delta; + }); + int active = 0; + for (const auto & ev : evs) { + if (ev.delta < 0) { active += ev.delta; if (active == 0) TRACE_COUNTER("GPU", "gpu_busy_percent", ev.ts, 0.0); } + else { if (active == 0) TRACE_COUNTER("GPU", "gpu_busy_percent", ev.ts, 100.0); active += ev.delta; } + } + } + emitted = true; + } + } + } + } + + if (emitted) return; + + // Fallback: use relative timeline and anchor last-slice end to the fence-return time if available + std::string path = llama_perfetto_tmp_stats_path(); + if (path.size() >= 8) path.replace(path.size() - 7, 7, "vktimeline"); + if (!g_vk_dump_timeline(0, path.c_str())) return; + + struct Entry { std::string name; uint64_t s_rel; uint64_t e_rel; }; + std::vector entries; entries.reserve(256); + std::ifstream ifs(path); if (!ifs.good()) return; + std::string line; uint64_t total_span = 0; + while (std::getline(ifs, line)) { + size_t c1 = line.find(','); size_t c2 = line.find(',', c1 == std::string::npos ? 0 : c1 + 1); + if (c1 == std::string::npos || c2 == std::string::npos) continue; + uint64_t s = strtoull(line.substr(0, c1).c_str(), nullptr, 10); + uint64_t e = strtoull(line.substr(c1 + 1, c2 - c1 - 1).c_str(), nullptr, 10); + std::string name = line.substr(c2 + 1); + entries.push_back({std::move(name), s, e}); + if (e > total_span) total_span = e; + } + ifs.close(); { std::error_code ec; std::filesystem::remove(path, ec); } + if (entries.empty() || total_span == 0) return; + + // Try to anchor spans to the CPU time when the fence returned to avoid drawing after the wait + uint64_t anchor_trace = 0; +#if !defined(_WIN32) + if (g_vk_get_anchor_mono_ns) { + uint64_t anchor_mono = g_vk_get_anchor_mono_ns(0); + if (anchor_mono) { + uint64_t trace_now = perfetto::TrackEvent::GetTraceTimeNs(); + timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); + uint64_t mono_now = uint64_t(ts.tv_sec) * 1000000000ull + uint64_t(ts.tv_nsec); + int64_t offset = (int64_t)trace_now - (int64_t)mono_now; + anchor_trace = (uint64_t)((int64_t)anchor_mono + offset); + } + } +#endif + uint64_t now = anchor_trace ? anchor_trace : perfetto::TrackEvent::GetTraceTimeNs(); + auto gpu_track = perfetto::Track(0x47505551304ULL); + { + static std::atomic desc{false}; bool expected = false; + if (desc.compare_exchange_strong(expected, true)) { + perfetto::protos::gen::TrackDescriptor td; td.set_name("GPU Queue 0"); + perfetto::TrackEvent::SetTrackDescriptor(gpu_track, td); + } + } + struct Ev { uint64_t ts; int delta; }; + std::vector evs; evs.reserve(entries.size()*2); + for (const auto & e : entries) { + uint64_t start_ns = now - (total_span - e.s_rel); + uint64_t end_ns = now - (total_span - e.e_rel); + TRACE_EVENT_BEGIN("GPU", perfetto::DynamicString(e.name.c_str()), gpu_track, start_ns); + TRACE_EVENT_END("GPU", gpu_track, end_ns); + if (end_ns > start_ns) { evs.push_back({start_ns, +1}); evs.push_back({end_ns, -1}); } + } + if (!evs.empty()) { + std::sort(evs.begin(), evs.end(), [](const Ev &a, const Ev &b){ if (a.ts != b.ts) return a.ts < b.ts; return a.delta < b.delta; }); + int active = 0; for (const auto & ev : evs) { if (ev.delta < 0) { active += ev.delta; if (active == 0) TRACE_COUNTER("GPU", "gpu_busy_percent", ev.ts, 0.0); } else { if (active == 0) TRACE_COUNTER("GPU", "gpu_busy_percent", ev.ts, 100.0); active += ev.delta; } } + } +} + +extern "C" void llama_perfetto_try_start_from_env(void) { + const char * path = getenv("LLAMA_PERFETTO_TRACE"); + if (path && *path) { + llama_perfetto_start_trace(path); + return; + } + // Fallback: if LLAMA_PERFETTO is set (any value), write to default file + const char * on = getenv("LLAMA_PERFETTO"); + if (on && *on) { + llama_perfetto_start_trace("llama.perfetto-trace"); + } +} diff --git a/src/profiler/perfetto.cc b/src/profiler/perfetto.cc new file mode 100644 index 0000000000000..359f4f3ecf235 --- /dev/null +++ b/src/profiler/perfetto.cc @@ -0,0 +1,66917 @@ +// Copyright (C) 2019 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This file is automatically generated by gen_amalgamated. Do not edit. + +// gen_amalgamated: predefined macros +#if !defined(PERFETTO_IMPLEMENTATION) +#define PERFETTO_IMPLEMENTATION +#endif +#include "perfetto.h" +// gen_amalgamated begin source: src/base/default_platform.cc +// gen_amalgamated begin header: include/perfetto/ext/base/platform.h +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_PLATFORM_H_ +#define INCLUDE_PERFETTO_EXT_BASE_PLATFORM_H_ + +namespace perfetto { +namespace base { +namespace platform { + +// Executed before entering a syscall (e.g. poll, read, write etc) which might +// block. +// This is overridden in Google internal builds for dealing with userspace +// scheduling. +void BeforeMaybeBlockingSyscall(); + +// Executed after entering a syscall (e.g. poll, read, write etc) which might +// block. +// This is overridden in Google internal builds for dealing with userspace +// scheduling. +void AfterMaybeBlockingSyscall(); + +} // namespace platform +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_PLATFORM_H_ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// gen_amalgamated expanded: #include "perfetto/ext/base/platform.h" + +namespace perfetto { +namespace base { +namespace platform { + +// This is a no-op outside of Google3 where we have some custom logic to deal +// with the userspace scheduler. +void BeforeMaybeBlockingSyscall() {} + +// This is a no-op outside of Google3 where we have some custom logic to deal +// with the userspace scheduler. +void AfterMaybeBlockingSyscall() {} + +} // namespace platform +} // namespace base +} // namespace perfetto +// gen_amalgamated begin source: src/base/android_utils.cc +// gen_amalgamated begin header: include/perfetto/ext/base/android_utils.h +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_ANDROID_UTILS_H_ +#define INCLUDE_PERFETTO_EXT_BASE_ANDROID_UTILS_H_ + +#include + +// gen_amalgamated expanded: #include "perfetto/base/build_config.h" + +namespace perfetto { +namespace base { + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) + +// Returns the value of the Android system property named `name`. If the +// property does not exist, returns an empty string (a non-existing property is +// the same as a property with an empty value for this API). +std::string GetAndroidProp(const char* name); + +#endif // PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_ANDROID_UTILS_H_ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// gen_amalgamated expanded: #include "perfetto/ext/base/android_utils.h" + +// gen_amalgamated expanded: #include "perfetto/base/build_config.h" + +#include + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) +#include +#endif + +// gen_amalgamated expanded: #include "perfetto/base/compiler.h" +// gen_amalgamated expanded: #include "perfetto/base/logging.h" + +namespace perfetto { +namespace base { + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) + +std::string GetAndroidProp(const char* name) { + std::string ret; +#if __ANDROID_API__ >= 26 + const prop_info* pi = __system_property_find(name); + if (!pi) { + return ret; + } + __system_property_read_callback( + pi, + [](void* dst_void, const char*, const char* value, uint32_t) { + std::string& dst = *static_cast(dst_void); + dst = value; + }, + &ret); +#else // __ANDROID_API__ < 26 + char value_buf[PROP_VALUE_MAX]; + int len = __system_property_get(name, value_buf); + if (len > 0 && static_cast(len) < sizeof(value_buf)) { + ret = std::string(value_buf, static_cast(len)); + } +#endif + return ret; +} + +#endif // PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) + +} // namespace base +} // namespace perfetto +// gen_amalgamated begin source: src/base/base64.cc +// gen_amalgamated begin header: include/perfetto/ext/base/base64.h +// gen_amalgamated begin header: include/perfetto/ext/base/string_view.h +// gen_amalgamated begin header: include/perfetto/ext/base/hash.h +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_HASH_H_ +#define INCLUDE_PERFETTO_EXT_BASE_HASH_H_ + +#include +#include +#include +#include +#include +#include + +namespace perfetto { +namespace base { + +// A helper class which computes a 64-bit hash of the input data. +// The algorithm used is FNV-1a as it is fast and easy to implement and has +// relatively few collisions. +// WARNING: This hash function should not be used for any cryptographic purpose. +class Hasher { + public: + // Creates an empty hash object + constexpr Hasher() = default; + + // Hashes a numeric value. + template < + typename T, + typename std::enable_if::value, bool>::type = true> + void Update(T data) { + Update(reinterpret_cast(&data), sizeof(data)); + } + + constexpr void Update(char c) { return Update(&c, 1); } + + // Using the loop instead of "Update(str, strlen(str))" to avoid looping twice + constexpr void Update(const char* str) { + for (const auto* p = str; *p; ++p) + Update(*p); + } + + // Hashes a byte array. + constexpr void Update(const char* data, size_t size) { + for (size_t i = 0; i < size; i++) { + result_ ^= static_cast(data[i]); + // Note: Arithmetic overflow of unsigned integers is well defined in C++ + // standard unlike signed integers. + // https://stackoverflow.com/a/41280273 + result_ *= kFnv1a64Prime; + } + } + + // Allow hashing anything that has `data` and `size` and has the kHashable + // trait (e.g., base::StringView). + template > + constexpr void Update(const T& t) { + if constexpr (std::is_member_function_pointer_v) { + Update(t.data(), t.size()); + } else { + Update(t.data, t.size); + } + } + + constexpr void Update(std::string_view s) { Update(s.data(), s.size()); } + + constexpr uint64_t digest() const { return result_; } + + // Usage: + // uint64_t hashed_value = Hash::Combine(33, false, "ABC", 458L, 3u, 'x'); + template + static constexpr uint64_t Combine(Ts&&... args) { + Hasher hasher; + hasher.UpdateAll(std::forward(args)...); + return hasher.digest(); + } + + // Creates a hasher with `args` already hashed. + // + // Usage: + // Hasher partial = Hash::CreatePartial(33, false, "ABC", 458L); + template + static constexpr Hasher CreatePartial(Ts&&... args) { + Hasher hasher; + hasher.UpdateAll(std::forward(args)...); + return hasher; + } + + // `hasher.UpdateAll(33, false, "ABC")` is shorthand for: + // `hasher.Update(33); hasher.Update(false); hasher.Update("ABC");` + constexpr void UpdateAll() {} + + template + constexpr void UpdateAll(T&& arg, Ts&&... args) { + Update(arg); + UpdateAll(std::forward(args)...); + } + + private: + static constexpr uint64_t kFnv1a64OffsetBasis = 0xcbf29ce484222325; + static constexpr uint64_t kFnv1a64Prime = 0x100000001b3; + + uint64_t result_ = kFnv1a64OffsetBasis; +}; + +// This is for using already-hashed key into std::unordered_map and avoid the +// cost of re-hashing. Example: +// unordered_map my_map. +template +struct AlreadyHashed { + size_t operator()(const T& x) const { return static_cast(x); } +}; + +// base::Hash uses base::Hasher for integer values and falls base to std::hash +// for other types. This is needed as std::hash for integers is just the +// identity function and Perfetto uses open-addressing hash table, which are +// very sensitive to hash quality and are known to degrade in performance +// when using std::hash. +template +struct Hash { + // Version for ints, using base::Hasher. + template + auto operator()(const U& x) -> + typename std::enable_if::value, size_t>::type + const { + Hasher hash; + hash.Update(x); + return static_cast(hash.digest()); + } + + // Version for non-ints, falling back to std::hash. + template + auto operator()(const U& x) -> + typename std::enable_if::value, size_t>::type + const { + return std::hash()(x); + } +}; + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_HASH_H_ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_STRING_VIEW_H_ +#define INCLUDE_PERFETTO_EXT_BASE_STRING_VIEW_H_ + +#include + +#include +#include +#include + +// gen_amalgamated expanded: #include "perfetto/base/build_config.h" +// gen_amalgamated expanded: #include "perfetto/base/logging.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/hash.h" + +namespace perfetto { +namespace base { + +// A string-like object that refers to a non-owned piece of memory. +// Strings are internally NOT null terminated. +class StringView { + public: + // Allow hashing with base::Hash. + static constexpr bool kHashable = true; + static constexpr size_t npos = static_cast(-1); + + StringView() : data_(nullptr), size_(0) {} + StringView(const StringView&) = default; + StringView& operator=(const StringView&) = default; + StringView(const char* data, size_t size) : data_(data), size_(size) { + PERFETTO_DCHECK(size == 0 || data != nullptr); + } + + // Allow implicit conversion from any class that has a |data| and |size| field + // and has the kConvertibleToStringView trait (e.g., protozero::ConstChars). + template > + StringView(const T& x) : StringView(x.data, x.size) { + PERFETTO_DCHECK(x.size == 0 || x.data != nullptr); + } + + // Creates a StringView from a null-terminated C string. + // Deliberately not "explicit". + StringView(const char* cstr) : data_(cstr), size_(strlen(cstr)) { + PERFETTO_DCHECK(cstr != nullptr); + } + + // This instead has to be explicit, as creating a StringView out of a + // std::string can be subtle. + explicit StringView(const std::string& str) + : data_(str.data()), size_(str.size()) {} + + bool empty() const { return size_ == 0; } + size_t size() const { return size_; } + const char* data() const { return data_; } + const char* begin() const { return data_; } + const char* end() const { return data_ + size_; } + + char at(size_t pos) const { + PERFETTO_DCHECK(pos < size_); + return data_[pos]; + } + + size_t find(char c, size_t start_pos = 0) const { + for (size_t i = start_pos; i < size_; ++i) { + if (data_[i] == c) + return i; + } + return npos; + } + + size_t find(const StringView& str, size_t start_pos = 0) const { + if (start_pos > size()) + return npos; + auto it = std::search(begin() + start_pos, end(), str.begin(), str.end()); + size_t pos = static_cast(it - begin()); + return pos + str.size() <= size() ? pos : npos; + } + + size_t find(const char* str, size_t start_pos = 0) const { + return find(StringView(str), start_pos); + } + + size_t rfind(char c) const { + for (size_t i = size_; i > 0; --i) { + if (data_[i - 1] == c) + return i - 1; + } + return npos; + } + + StringView substr(size_t pos, size_t count = npos) const { + if (pos >= size_) + return StringView("", 0); + size_t rcount = std::min(count, size_ - pos); + return StringView(data_ + pos, rcount); + } + + bool CaseInsensitiveEq(const StringView& other) const { + if (size() != other.size()) + return false; + if (size() == 0) + return true; +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + return _strnicmp(data(), other.data(), size()) == 0; +#else + return strncasecmp(data(), other.data(), size()) == 0; +#endif + } + + bool CaseInsensitiveOneOf(const std::vector& others) const { + for (const StringView& other : others) { + if (CaseInsensitiveEq(other)) { + return true; + } + } + return false; + } + + bool StartsWith(const StringView& other) const { + if (other.size() == 0) + return true; + if (size() == 0) + return false; + if (other.size() > size()) + return false; + return memcmp(data(), other.data(), other.size()) == 0; + } + + bool EndsWith(const StringView& other) const { + if (other.size() == 0) + return true; + if (size() == 0) + return false; + if (other.size() > size()) + return false; + size_t off = size() - other.size(); + return memcmp(data() + off, other.data(), other.size()) == 0; + } + + std::string ToStdString() const { + return size_ == 0 ? "" : std::string(data_, size_); + } + + uint64_t Hash() const { + base::Hasher hasher; + hasher.Update(data_, size_); + return hasher.digest(); + } + + private: + const char* data_ = nullptr; + size_t size_ = 0; +}; + +inline bool operator==(const StringView& x, const StringView& y) { + if (x.size() != y.size()) + return false; + if (x.size() == 0) + return true; + return memcmp(x.data(), y.data(), x.size()) == 0; +} + +inline bool operator!=(const StringView& x, const StringView& y) { + return !(x == y); +} + +inline bool operator<(const StringView& x, const StringView& y) { + auto size = std::min(x.size(), y.size()); + if (size == 0) + return x.size() < y.size(); + int result = memcmp(x.data(), y.data(), size); + return result < 0 || (result == 0 && x.size() < y.size()); +} + +inline bool operator>=(const StringView& x, const StringView& y) { + return !(x < y); +} + +inline bool operator>(const StringView& x, const StringView& y) { + return y < x; +} + +inline bool operator<=(const StringView& x, const StringView& y) { + return !(y < x); +} + +} // namespace base +} // namespace perfetto + +template <> +struct std::hash<::perfetto::base::StringView> { + size_t operator()(const ::perfetto::base::StringView& sv) const { + return static_cast(sv.Hash()); + } +}; + +#endif // INCLUDE_PERFETTO_EXT_BASE_STRING_VIEW_H_ +// gen_amalgamated begin header: include/perfetto/ext/base/utils.h +// gen_amalgamated begin header: include/perfetto/ext/base/sys_types.h +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_SYS_TYPES_H_ +#define INCLUDE_PERFETTO_EXT_BASE_SYS_TYPES_H_ + +// This headers deals with sys types commonly used in the codebase that are +// missing on Windows. + +#include // IWYU pragma: export +#include + +// gen_amalgamated expanded: #include "perfetto/base/build_config.h" + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + +#if !PERFETTO_BUILDFLAG(PERFETTO_COMPILER_GCC) +// MinGW has these. clang-cl and MSVC, which use just the Windows SDK, don't. +using uid_t = int; +using pid_t = int; +#endif // !GCC + +#if defined(_WIN64) +using ssize_t = int64_t; +#else +using ssize_t = long; +#endif // _WIN64 + +#endif // OS_WIN + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) && !defined(AID_SHELL) +// From libcutils' android_filesystem_config.h . +#define AID_SHELL 2000 +#endif + +namespace perfetto { +namespace base { + +// The machine ID used in the tracing core. +using MachineID = uint32_t; +// The default value reserved for the host trace. +constexpr MachineID kDefaultMachineID = 0; + +constexpr uid_t kInvalidUid = static_cast(-1); +constexpr pid_t kInvalidPid = static_cast(-1); + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_SYS_TYPES_H_ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_UTILS_H_ +#define INCLUDE_PERFETTO_EXT_BASE_UTILS_H_ + +#include +#include +#include +#include + +#include +#include +#include +#include + +// gen_amalgamated expanded: #include "perfetto/base/build_config.h" +// gen_amalgamated expanded: #include "perfetto/base/compiler.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/sys_types.h" + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) +// Even if Windows has errno.h, the all syscall-restart behavior does not apply. +// Trying to handle EINTR can cause more harm than good if errno is left stale. +// Chromium does the same. +#define PERFETTO_EINTR(x) (x) +#else +#define PERFETTO_EINTR(x) \ + ([&] { \ + decltype(x) eintr_wrapper_result; \ + do { \ + eintr_wrapper_result = (x); \ + } while (eintr_wrapper_result == -1 && errno == EINTR); \ + return eintr_wrapper_result; \ + }()) +#endif + +namespace perfetto { +namespace base { + +namespace internal { +extern std::atomic g_cached_page_size; +uint32_t GetSysPageSizeSlowpath(); +} // namespace internal + +// Returns the system's page size. Use this when dealing with mmap, madvise and +// similar mm-related syscalls. +// This function might be called in hot paths. Avoid calling getpagesize() all +// the times, in many implementations getpagesize() calls sysconf() which is +// not cheap. +inline uint32_t GetSysPageSize() { + const uint32_t page_size = + internal::g_cached_page_size.load(std::memory_order_relaxed); + return page_size != 0 ? page_size : internal::GetSysPageSizeSlowpath(); +} + +template +constexpr size_t ArraySize(const T (&)[TSize]) { + return TSize; +} + +// Function object which invokes 'free' on its parameter, which must be +// a pointer. Can be used to store malloc-allocated pointers in std::unique_ptr: +// +// std::unique_ptr foo_ptr( +// static_cast(malloc(sizeof(int)))); +struct FreeDeleter { + inline void operator()(void* ptr) const { free(ptr); } +}; + +template +constexpr T AssumeLittleEndian(T value) { +#if !PERFETTO_IS_LITTLE_ENDIAN() + static_assert(false, "Unimplemented on big-endian archs"); +#endif + return value; +} + +// Round up |size| to a multiple of |alignment| (must be a power of two). +inline constexpr size_t AlignUp(size_t size, size_t alignment) { + return (size + alignment - 1) & ~(alignment - 1); +} + +// TODO(primiano): clean this up and move all existing usages to the constexpr +// version above. +template +constexpr size_t AlignUp(size_t size) { + static_assert((alignment & (alignment - 1)) == 0, "alignment must be a pow2"); + return AlignUp(size, alignment); +} + +inline bool IsAgain(int err) { + return err == EAGAIN || err == EWOULDBLOCK; +} + +// setenv(2)-equivalent. Deals with Windows vs Posix discrepancies. +void SetEnv(const std::string& key, const std::string& value); + +// unsetenv(2)-equivalent. Deals with Windows vs Posix discrepancies. +void UnsetEnv(const std::string& key); + +// Calls mallopt(M_PURGE, 0) on Android. Does nothing on other platforms. +// This forces the allocator to release freed memory. This is used to work +// around various Scudo inefficiencies. See b/170217718. +void MaybeReleaseAllocatorMemToOS(); + +// geteuid() on POSIX OSes, returns 0 on Windows (See comment in utils.cc). +uid_t GetCurrentUserId(); + +// Forks the process. +// Parent: prints the PID of the child, calls |parent_cb| and exits from the +// process with its return value. +// Child: redirects stdio onto /dev/null, chdirs into / and returns. +void Daemonize(std::function parent_cb); + +// Returns the path of the current executable, e.g. /foo/bar/exe. +std::string GetCurExecutablePath(); + +// Returns the directory where the current executable lives in, e.g. /foo/bar. +// This is independent of cwd(). +std::string GetCurExecutableDir(); + +// Memory returned by AlignedAlloc() must be freed via AlignedFree() not just +// free. It makes a difference on Windows where _aligned_malloc() and +// _aligned_free() must be paired. +// Prefer using the AlignedAllocTyped() below which takes care of the pairing. +void* AlignedAlloc(size_t alignment, size_t size); +void AlignedFree(void*); + +// Detects Sync-mode MTE (currently being tested in some Android builds). +// This is known to use extra memory for the stack history buffer. +bool IsSyncMemoryTaggingEnabled(); + +// A RAII version of the above, which takes care of pairing Aligned{Alloc,Free}. +template +struct AlignedDeleter { + inline void operator()(T* ptr) const { AlignedFree(ptr); } +}; + +// The remove_extent here and below is to allow defining unique_ptr. +// As per https://en.cppreference.com/w/cpp/memory/unique_ptr the Deleter takes +// always a T*, not a T[]*. +template +using AlignedUniquePtr = + std::unique_ptr::type>>; + +template +AlignedUniquePtr AlignedAllocTyped(size_t n_membs) { + using TU = typename std::remove_extent::type; + return AlignedUniquePtr( + static_cast(AlignedAlloc(alignof(TU), sizeof(TU) * n_membs))); +} + +// A RAII wrapper to invoke a function when leaving a function/scope. +template +class OnScopeExitWrapper { + public: + explicit OnScopeExitWrapper(Func f) : f_(std::move(f)), active_(true) {} + OnScopeExitWrapper(OnScopeExitWrapper&& other) noexcept + : f_(std::move(other.f_)), active_(other.active_) { + other.active_ = false; + } + ~OnScopeExitWrapper() { + if (active_) + f_(); + } + + private: + Func f_; + bool active_; +}; + +template +PERFETTO_WARN_UNUSED_RESULT OnScopeExitWrapper OnScopeExit(Func f) { + return OnScopeExitWrapper(std::move(f)); +} + +// Returns a xxd-style hex dump (hex + ascii chars) of the input data. +std::string HexDump(const void* data, size_t len, size_t bytes_per_line = 16); +inline std::string HexDump(const std::string& data, + size_t bytes_per_line = 16) { + return HexDump(data.data(), data.size(), bytes_per_line); +} + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_UTILS_H_ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_BASE64_H_ +#define INCLUDE_PERFETTO_EXT_BASE_BASE64_H_ + +#include +#include + +// gen_amalgamated expanded: #include "perfetto/ext/base/string_view.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/utils.h" // For ssize_t. + +namespace perfetto { +namespace base { + +// Returns the length of the destination string (included '=' padding). +// Does NOT include the size of the string null terminator. +inline size_t Base64EncSize(size_t src_size) { + return (src_size + 2) / 3 * 4; +} + +// Returns the upper bound on the length of the destination buffer. +// The actual decoded length might be <= the number returned here. +inline size_t Base64DecSize(size_t src_size) { + return (src_size + 3) / 4 * 3; +} + +// Does NOT null-terminate |dst|. +ssize_t Base64Encode(const void* src, + size_t src_size, + char* dst, + size_t dst_size); + +std::string Base64Encode(const void* src, size_t src_size); + +inline std::string Base64Encode(StringView sv) { + return Base64Encode(sv.data(), sv.size()); +} + +// Returns -1 in case of failure. +ssize_t Base64Decode(const char* src, + size_t src_size, + uint8_t* dst, + size_t dst_size); + +std::optional Base64Decode(const char* src, size_t src_size); + +inline std::optional Base64Decode(StringView sv) { + return Base64Decode(sv.data(), sv.size()); +} + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_BASE64_H_ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// gen_amalgamated expanded: #include "perfetto/ext/base/base64.h" + +namespace perfetto { +namespace base { + +namespace { + +constexpr char kPadding = '='; + +constexpr char kEncTable[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static_assert(sizeof(kEncTable) == (1u << 6) + sizeof('\0'), "Bad table size"); + +// Maps an ASCII character to its 6-bit value. It only contains translations +// from '+' to 'z'. Supports the standard (+/) and URL-safe (-_) alphabets. +constexpr uint8_t kX = 0xff; // Value used for invalid characters +constexpr uint8_t kDecTable[] = { + 62, kX, 62, kX, 63, 52, 53, 54, 55, 56, // 00 - 09 + 57, 58, 59, 60, 61, kX, kX, kX, 0, kX, // 10 - 19 + kX, kX, 0, 1, 2, 3, 4, 5, 6, 7, // 20 - 29 + 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, // 30 - 39 + 18, 19, 20, 21, 22, 23, 24, 25, kX, kX, // 40 - 49 + kX, kX, 63, kX, 26, 27, 28, 29, 30, 31, // 50 - 59 + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, // 60 - 69 + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 70 - 79 +}; +constexpr char kMinDecChar = '+'; +constexpr char kMaxDecChar = 'z'; +static_assert(kMaxDecChar - kMinDecChar <= sizeof(kDecTable), "Bad table size"); + +inline uint8_t DecodeChar(char c) { + if (c < kMinDecChar || c > kMaxDecChar) + return kX; + return kDecTable[c - kMinDecChar]; +} + +} // namespace + +ssize_t Base64Encode(const void* src, + size_t src_size, + char* dst, + size_t dst_size) { + const size_t padded_dst_size = Base64EncSize(src_size); + if (dst_size < padded_dst_size) + return -1; // Not enough space in output. + + const uint8_t* rd = static_cast(src); + const uint8_t* const end = rd + src_size; + size_t wr_size = 0; + while (rd < end) { + uint8_t s[3]{}; + s[0] = *(rd++); + dst[wr_size++] = kEncTable[s[0] >> 2]; + + uint8_t carry0 = static_cast((s[0] & 0x03) << 4); + if (PERFETTO_LIKELY(rd < end)) { + s[1] = *(rd++); + dst[wr_size++] = kEncTable[carry0 | (s[1] >> 4)]; + } else { + dst[wr_size++] = kEncTable[carry0]; + dst[wr_size++] = kPadding; + dst[wr_size++] = kPadding; + break; + } + + uint8_t carry1 = static_cast((s[1] & 0x0f) << 2); + if (PERFETTO_LIKELY(rd < end)) { + s[2] = *(rd++); + dst[wr_size++] = kEncTable[carry1 | (s[2] >> 6)]; + } else { + dst[wr_size++] = kEncTable[carry1]; + dst[wr_size++] = kPadding; + break; + } + + dst[wr_size++] = kEncTable[s[2] & 0x3f]; + } + PERFETTO_DCHECK(wr_size == padded_dst_size); + return static_cast(padded_dst_size); +} + +std::string Base64Encode(const void* src, size_t src_size) { + std::string dst; + dst.resize(Base64EncSize(src_size)); + auto res = Base64Encode(src, src_size, &dst[0], dst.size()); + PERFETTO_CHECK(res == static_cast(dst.size())); + return dst; +} + +ssize_t Base64Decode(const char* src, + size_t src_size, + uint8_t* dst, + size_t dst_size) { + const size_t min_dst_size = Base64DecSize(src_size); + if (dst_size < min_dst_size) + return -1; + + const char* rd = src; + const char* const end = src + src_size; + size_t wr_size = 0; + + char s[4]{}; + while (rd < end) { + uint8_t d[4]; + for (uint32_t j = 0; j < 4; j++) { + // Padding is only feasible for the last 2 chars of each group of 4. + s[j] = rd < end ? *(rd++) : (j < 2 ? '\0' : kPadding); + d[j] = DecodeChar(s[j]); + if (d[j] == kX) + return -1; // Invalid input char. + } + dst[wr_size] = static_cast((d[0] << 2) | (d[1] >> 4)); + dst[wr_size + 1] = static_cast((d[1] << 4) | (d[2] >> 2)); + dst[wr_size + 2] = static_cast((d[2] << 6) | (d[3])); + wr_size += 3; + } + + PERFETTO_CHECK(wr_size <= dst_size); + wr_size -= (s[3] == kPadding ? 1 : 0) + (s[2] == kPadding ? 1 : 0); + return static_cast(wr_size); +} + +std::optional Base64Decode(const char* src, size_t src_size) { + std::string dst; + dst.resize(Base64DecSize(src_size)); + auto res = Base64Decode(src, src_size, reinterpret_cast(&dst[0]), + dst.size()); + if (res < 0) + return std::nullopt; // Decoding error. + + PERFETTO_CHECK(res <= static_cast(dst.size())); + dst.resize(static_cast(res)); + return std::make_optional(dst); +} + +} // namespace base +} // namespace perfetto +// gen_amalgamated begin source: src/base/crash_keys.cc +// gen_amalgamated begin header: include/perfetto/ext/base/crash_keys.h +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_CRASH_KEYS_H_ +#define INCLUDE_PERFETTO_EXT_BASE_CRASH_KEYS_H_ + +#include +#include + +#include +#include + +// gen_amalgamated expanded: #include "perfetto/base/compiler.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/string_view.h" + +// Crash keys are very simple global variables with static-storage that +// are reported on crash time for managed crashes (CHECK/FATAL/Watchdog). +// - Translation units can define a CrashKey and register it at some point +// during initialization. +// - CrashKey instances must be long-lived. They should really be just global +// static variable in the anonymous namespace. +// Example: +// subsystem_1.cc +// CrashKey g_client_id("ipc_client_id"); +// ... +// OnIpcReceived(client_id) { +// g_client_id.Set(client_id); +// ... // Process the IPC +// g_client_id.Clear(); +// } +// Or equivalently: +// OnIpcReceived(client_id) { +// auto scoped_key = g_client_id.SetScoped(client_id); +// ... // Process the IPC +// } +// +// If a crash happens while processing the IPC, the crash report will +// have a line "ipc_client_id: 42". +// +// Thread safety considerations: +// CrashKeys can be registered and set/cleared from any thread. +// There is no compelling use-case to have full acquire/release consistency when +// setting a key. This means that if a thread crashes immediately after a +// crash key has been set on another thread, the value printed on the crash +// report could be incomplete. The code guarantees defined behavior and does +// not rely on null-terminated string (in the worst case 32 bytes of random +// garbage will be printed out). + +// The tests live in logging_unittest.cc. + +namespace perfetto { +namespace base { + +constexpr size_t kCrashKeyMaxStrSize = 32; + +// CrashKey instances must be long lived +class CrashKey { + public: + class ScopedClear { + public: + explicit ScopedClear(CrashKey* k) : key_(k) {} + ~ScopedClear() { + if (key_) + key_->Clear(); + } + ScopedClear(const ScopedClear&) = delete; + ScopedClear& operator=(const ScopedClear&) = delete; + ScopedClear& operator=(ScopedClear&&) = delete; + ScopedClear(ScopedClear&& other) noexcept : key_(other.key_) { + other.key_ = nullptr; + } + + private: + CrashKey* key_; + }; + + // constexpr so it can be used in the anon namespace without requiring a + // global constructor. + // |name| must be a long-lived string. + constexpr explicit CrashKey(const char* name) + : registered_{}, type_(Type::kUnset), name_(name), str_value_{} {} + CrashKey(const CrashKey&) = delete; + CrashKey& operator=(const CrashKey&) = delete; + CrashKey(CrashKey&&) = delete; + CrashKey& operator=(CrashKey&&) = delete; + + enum class Type : uint8_t { kUnset = 0, kInt, kStr }; + + void Clear() { + int_value_.store(0, std::memory_order_relaxed); + type_.store(Type::kUnset, std::memory_order_relaxed); + } + + void Set(int64_t value) { + int_value_.store(value, std::memory_order_relaxed); + type_.store(Type::kInt, std::memory_order_relaxed); + if (PERFETTO_UNLIKELY(!registered_.load(std::memory_order_relaxed))) + Register(); + } + + void Set(StringView sv) { + size_t len = std::min(sv.size(), sizeof(str_value_) - 1); + for (size_t i = 0; i < len; ++i) + str_value_[i].store(sv.data()[i], std::memory_order_relaxed); + str_value_[len].store('\0', std::memory_order_relaxed); + type_.store(Type::kStr, std::memory_order_relaxed); + if (PERFETTO_UNLIKELY(!registered_.load(std::memory_order_relaxed))) + Register(); + } + + ScopedClear SetScoped(int64_t value) PERFETTO_WARN_UNUSED_RESULT { + Set(value); + return ScopedClear(this); + } + + ScopedClear SetScoped(StringView sv) PERFETTO_WARN_UNUSED_RESULT { + Set(sv); + return ScopedClear(this); + } + + void Register(); + + int64_t int_value() const { + return int_value_.load(std::memory_order_relaxed); + } + size_t ToString(char* dst, size_t len); + + private: + std::atomic registered_; + std::atomic type_; + const char* const name_; + union { + std::atomic str_value_[kCrashKeyMaxStrSize]; + std::atomic int_value_; + }; +}; + +// Fills |dst| with a string containing one line for each crash key +// (excluding the unset ones). +// Returns number of chars written, without counting the NUL terminator. +// This is used in logging.cc when emitting the crash report abort message. +size_t SerializeCrashKeys(char* dst, size_t len); + +void UnregisterAllCrashKeysForTesting(); + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_CRASH_KEYS_H_ +// gen_amalgamated begin header: include/perfetto/ext/base/string_utils.h +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_STRING_UTILS_H_ +#define INCLUDE_PERFETTO_EXT_BASE_STRING_UTILS_H_ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +// gen_amalgamated expanded: #include "perfetto/ext/base/string_view.h" + +namespace perfetto { +namespace base { + +inline char Lowercase(char c) { + return ('A' <= c && c <= 'Z') ? static_cast(c - ('A' - 'a')) : c; +} + +inline char Uppercase(char c) { + return ('a' <= c && c <= 'z') ? static_cast(c + ('A' - 'a')) : c; +} + +inline std::optional CStringToUInt32(const char* s, int base = 10) { + char* endptr = nullptr; + auto value = static_cast(strtoul(s, &endptr, base)); + return (*s && !*endptr) ? std::make_optional(value) : std::nullopt; +} + +inline std::optional CStringToInt32(const char* s, int base = 10) { + char* endptr = nullptr; + auto value = static_cast(strtol(s, &endptr, base)); + return (*s && !*endptr) ? std::make_optional(value) : std::nullopt; +} + +// Note: it saturates to 7fffffffffffffff if parsing a hex number >= 0x8000... +inline std::optional CStringToInt64(const char* s, int base = 10) { + char* endptr = nullptr; + auto value = static_cast(strtoll(s, &endptr, base)); + return (*s && !*endptr) ? std::make_optional(value) : std::nullopt; +} + +inline std::optional CStringToUInt64(const char* s, int base = 10) { + char* endptr = nullptr; + auto value = static_cast(strtoull(s, &endptr, base)); + return (*s && !*endptr) ? std::make_optional(value) : std::nullopt; +} + +double StrToD(const char* nptr, char** endptr); + +inline std::optional CStringToDouble(const char* s) { + char* endptr = nullptr; + double value = StrToD(s, &endptr); + std::optional result(std::nullopt); + if (*s != '\0' && *endptr == '\0') + result = value; + return result; +} + +inline std::optional StringToUInt32(const std::string& s, + int base = 10) { + return CStringToUInt32(s.c_str(), base); +} + +inline std::optional StringToInt32(const std::string& s, + int base = 10) { + return CStringToInt32(s.c_str(), base); +} + +inline std::optional StringToUInt64(const std::string& s, + int base = 10) { + return CStringToUInt64(s.c_str(), base); +} + +inline std::optional StringToInt64(const std::string& s, + int base = 10) { + return CStringToInt64(s.c_str(), base); +} + +inline std::optional StringToDouble(const std::string& s) { + return CStringToDouble(s.c_str()); +} + +template +inline std::optional StringViewToNumber(const base::StringView& sv, + int base = 10) { + // std::from_chars() does not regonize the leading '+' character and only + // recognizes '-' so remove the '+' if it exists to avoid errors and match + // the behavior of the other string conversion utilities above. + size_t start_offset = !sv.empty() && sv.at(0) == '+' ? 1 : 0; + T value; + auto result = + std::from_chars(sv.begin() + start_offset, sv.end(), value, base); + if (result.ec == std::errc() && result.ptr == sv.end()) { + return value; + } else { + return std::nullopt; + } +} + +inline std::optional StringViewToUInt32(const base::StringView& sv, + int base = 10) { + // std::from_chars() does not recognize the leading '-' character for + // unsigned conversions, but strtol does. To Mimic the behavior of strtol, + // attempt a signed converion if we see a leading '-', and then cast the + // result back to unsigned. + if (sv.size() > 0 && sv.at(0) == '-') { + return static_cast >( + StringViewToNumber(sv, base)); + } else { + return StringViewToNumber(sv, base); + } +} + +inline std::optional StringViewToInt32(const base::StringView& sv, + int base = 10) { + return StringViewToNumber(sv, base); +} + +inline std::optional StringViewToUInt64(const base::StringView& sv, + int base = 10) { + // std::from_chars() does not recognize the leading '-' character for + // unsigned conversions, but strtol does. To Mimic the behavior of strtol, + // attempt a signed converion if we see a leading '-', and then cast the + // result back to unsigned. + if (sv.size() > 0 && sv.at(0) == '-') { + return static_cast >( + StringViewToNumber(sv, base)); + } else { + return StringViewToNumber(sv, base); + } +} + +inline std::optional StringViewToInt64(const base::StringView& sv, + int base = 10) { + return StringViewToNumber(sv, base); +} + +// TODO: As of Clang 19.0 std::from_chars is unimplemented for type double +// despite being part of C++17 standard, and already being supported by GCC and +// MSVC. Enable this once we have double support in Clang. +// inline std::optional StringViewToDouble(const base::StringView& sv) { +// return StringViewToNumber(sv); +// } + +bool StartsWith(const std::string& str, const std::string& prefix); +bool EndsWith(const std::string& str, const std::string& suffix); +bool StartsWithAny(const std::string& str, + const std::vector& prefixes); +bool Contains(const std::string& haystack, const std::string& needle); +bool Contains(const std::string& haystack, char needle); +size_t Find(const StringView& needle, const StringView& haystack); +bool CaseInsensitiveEqual(const std::string& first, const std::string& second); +std::string Join(const std::vector& parts, + const std::string& delim); +std::vector SplitString(const std::string& text, + const std::string& delimiter); +std::string StripPrefix(const std::string& str, const std::string& prefix); +std::string StripSuffix(const std::string& str, const std::string& suffix); +std::string TrimWhitespace(const std::string& str); +std::string ToLower(const std::string& str); +std::string ToUpper(const std::string& str); +std::string StripChars(const std::string& str, + const std::string& chars, + char replacement); +std::string ToHex(const char* data, size_t size); +inline std::string ToHex(const std::string& s) { + return ToHex(s.c_str(), s.size()); +} +std::string IntToHexString(uint32_t number); +std::string Uint64ToHexString(uint64_t number); +std::string Uint64ToHexStringNoPrefix(uint64_t number); +std::string ReplaceAll(std::string str, + const std::string& to_replace, + const std::string& replacement); + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) +bool WideToUTF8(const std::wstring& source, std::string& output); +bool UTF8ToWide(const std::string& source, std::wstring& output); +#endif // PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + +// A BSD-style strlcpy without the return value. +// Copies at most |dst_size|-1 characters. Unlike strncpy, it always \0 +// terminates |dst|, as long as |dst_size| is not 0. +// Unlike strncpy and like strlcpy it does not zero-pad the rest of |dst|. +// Returns nothing. The BSD strlcpy returns the size of |src|, which might +// be > |dst_size|. Anecdotal experience suggests people assume the return value +// is the number of bytes written in |dst|. That assumption can lead to +// dangerous bugs. +// In order to avoid being subtly uncompliant with strlcpy AND avoid misuse, +// the choice here is to return nothing. +inline void StringCopy(char* dst, const char* src, size_t dst_size) { + for (size_t i = 0; i < dst_size; ++i) { + if ((dst[i] = src[i]) == '\0') { + return; // We hit and copied the null terminator. + } + } + + // We were left off at dst_size. We over copied 1 byte. Null terminate. + if (PERFETTO_LIKELY(dst_size > 0)) + dst[dst_size - 1] = 0; +} + +// Like snprintf() but returns the number of chars *actually* written (without +// counting the null terminator) NOT "the number of chars which would have been +// written to the final string if enough space had been available". +// This should be used in almost all cases when the caller uses the return value +// of snprintf(). If the return value is not used, there is no benefit in using +// this wrapper, as this just calls snprintf() and mangles the return value. +// It always null-terminates |dst| (even in case of errors), unless +// |dst_size| == 0. +// Examples: +// SprintfTrunc(x, 4, "123whatever"): returns 3 and writes "123\0". +// SprintfTrunc(x, 4, "123"): returns 3 and writes "123\0". +// SprintfTrunc(x, 3, "123"): returns 2 and writes "12\0". +// SprintfTrunc(x, 2, "123"): returns 1 and writes "1\0". +// SprintfTrunc(x, 1, "123"): returns 0 and writes "\0". +// SprintfTrunc(x, 0, "123"): returns 0 and writes nothing. +// NOTE: This means that the caller has no way to tell when truncation happens +// vs the edge case of *just* fitting in the buffer. +size_t SprintfTrunc(char* dst, size_t dst_size, const char* fmt, ...) + PERFETTO_PRINTF_FORMAT(3, 4); + +// Line number starts from 1 +struct LineWithOffset { + base::StringView line; + uint32_t line_offset; + uint32_t line_num; +}; + +// For given string and offset Pfinds a line with character for +// which offset points, what number is this line (starts from 1), and the offset +// inside this line. returns std::nullopt if the offset points to +// line break character or exceeds string length. +std::optional FindLineWithOffset(base::StringView str, + uint32_t offset); + +// A helper class to facilitate construction and usage of write-once stack +// strings. +// Example usage: +// StackString<32> x("format %d %s", 42, string_arg); +// TakeString(x.c_str() | x.string_view() | x.ToStdString()); +// Rather than char x[32] + sprintf. +// Advantages: +// - Avoids useless zero-fills caused by people doing `char buf[32] {}` (mainly +// by fearing unknown snprintf failure modes). +// - Makes the code more robust in case of snprintf truncations (len() and +// string_view() will return the truncated length, unlike snprintf). +template +class StackString { + public: + explicit PERFETTO_PRINTF_FORMAT(/* 1=this */ 2, 3) + StackString(const char* fmt, ...) { + buf_[0] = '\0'; + va_list args; + va_start(args, fmt); + int res = vsnprintf(buf_, sizeof(buf_), fmt, args); + va_end(args); + buf_[sizeof(buf_) - 1] = '\0'; + len_ = res < 0 ? 0 : std::min(static_cast(res), sizeof(buf_) - 1); + } + + StringView string_view() const { return StringView(buf_, len_); } + std::string ToStdString() const { return std::string(buf_, len_); } + const char* c_str() const { return buf_; } + size_t len() const { return len_; } + char* mutable_data() { return buf_; } + + private: + char buf_[N]; + size_t len_ = 0; // Does not include the \0. +}; + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_STRING_UTILS_H_ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// gen_amalgamated expanded: #include "perfetto/ext/base/crash_keys.h" + +#include + +#include +#include + +// gen_amalgamated expanded: #include "perfetto/ext/base/string_utils.h" + +namespace perfetto { +namespace base { + +namespace { + +constexpr size_t kMaxKeys = 32; + +std::atomic g_keys[kMaxKeys]{}; +std::atomic g_num_keys{}; +} // namespace + +void CrashKey::Register() { + // If doesn't matter if we fail below. If there are no slots left, don't + // keep trying re-registering on every Set(), the outcome won't change. + + // If two threads raced on the Register(), avoid registering the key twice. + if (registered_.exchange(true)) + return; + + uint32_t slot = g_num_keys.fetch_add(1); + if (slot >= kMaxKeys) { + PERFETTO_LOG("Too many crash keys registered"); + return; + } + g_keys[slot].store(this); +} + +// Returns the number of chars written, without counting the \0. +size_t CrashKey::ToString(char* dst, size_t len) { + if (len > 0) + *dst = '\0'; + switch (type_.load(std::memory_order_relaxed)) { + case Type::kUnset: + break; + case Type::kInt: + return SprintfTrunc(dst, len, "%s: %" PRId64 "\n", name_, + int_value_.load(std::memory_order_relaxed)); + case Type::kStr: + char buf[sizeof(str_value_)]; + for (size_t i = 0; i < sizeof(str_value_); i++) + buf[i] = str_value_[i].load(std::memory_order_relaxed); + + // Don't assume |str_value_| is properly null-terminated. + return SprintfTrunc(dst, len, "%s: %.*s\n", name_, int(sizeof(buf)), buf); + } + return 0; +} + +void UnregisterAllCrashKeysForTesting() { + g_num_keys.store(0); + for (auto& key : g_keys) + key.store(nullptr); +} + +size_t SerializeCrashKeys(char* dst, size_t len) { + size_t written = 0; + uint32_t num_keys = g_num_keys.load(); + if (len > 0) + *dst = '\0'; + for (uint32_t i = 0; i < num_keys && written < len; i++) { + CrashKey* key = g_keys[i].load(); + if (!key) + continue; // Can happen if we hit this between the add and the store. + written += key->ToString(dst + written, len - written); + } + PERFETTO_DCHECK(written <= len); + PERFETTO_DCHECK(len == 0 || dst[written] == '\0'); + return written; +} + +} // namespace base +} // namespace perfetto +// gen_amalgamated begin source: src/base/ctrl_c_handler.cc +// gen_amalgamated begin header: include/perfetto/ext/base/ctrl_c_handler.h +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_CTRL_C_HANDLER_H_ +#define INCLUDE_PERFETTO_EXT_BASE_CTRL_C_HANDLER_H_ + +namespace perfetto { +namespace base { + +// On Linux/Android/Mac: installs SIGINT + SIGTERM signal handlers. +// On Windows: installs a SetConsoleCtrlHandler() handler. +// The passed handler must be async safe. +using CtrlCHandlerFunction = void (*)(); +void InstallCtrlCHandler(CtrlCHandlerFunction); + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_CTRL_C_HANDLER_H_ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// gen_amalgamated expanded: #include "perfetto/ext/base/ctrl_c_handler.h" + +// gen_amalgamated expanded: #include "perfetto/base/build_config.h" +// gen_amalgamated expanded: #include "perfetto/base/compiler.h" +// gen_amalgamated expanded: #include "perfetto/base/logging.h" + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) +#include +#include +#else +#include +#include +#endif + +namespace perfetto { +namespace base { + +namespace { +CtrlCHandlerFunction g_handler = nullptr; +} + +void InstallCtrlCHandler(CtrlCHandlerFunction handler) { + PERFETTO_CHECK(g_handler == nullptr); + g_handler = handler; + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + auto trampoline = [](DWORD type) -> int { + if (type == CTRL_C_EVENT) { + g_handler(); + return true; + } + return false; + }; + ::SetConsoleCtrlHandler(trampoline, true); +#elif PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \ + PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \ + PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE) + // Setup signal handler. + struct sigaction sa {}; + +// Glibc headers for sa_sigaction trigger this. +#pragma GCC diagnostic push +#if defined(__clang__) +#pragma GCC diagnostic ignored "-Wdisabled-macro-expansion" +#endif + sa.sa_handler = [](int) { g_handler(); }; +#if !PERFETTO_BUILDFLAG(PERFETTO_OS_QNX) + sa.sa_flags = static_cast(SA_RESETHAND | SA_RESTART); +#else // POSIX-compliant + sa.sa_flags = static_cast(SA_RESETHAND); +#endif +#pragma GCC diagnostic pop + sigaction(SIGINT, &sa, nullptr); + sigaction(SIGTERM, &sa, nullptr); +#else + // Do nothing on NaCL and Fuchsia. + ignore_result(handler); +#endif +} + +} // namespace base +} // namespace perfetto +// gen_amalgamated begin source: src/base/event_fd.cc +// gen_amalgamated begin header: include/perfetto/ext/base/event_fd.h +// gen_amalgamated begin header: include/perfetto/ext/base/scoped_file.h +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_SCOPED_FILE_H_ +#define INCLUDE_PERFETTO_EXT_BASE_SCOPED_FILE_H_ + +// gen_amalgamated expanded: #include "perfetto/base/build_config.h" + +#include + +#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) +#include // For DIR* / opendir(). +#endif + +#include + +// gen_amalgamated expanded: #include "perfetto/base/export.h" +// gen_amalgamated expanded: #include "perfetto/base/logging.h" +// gen_amalgamated expanded: #include "perfetto/base/platform_handle.h" + +namespace perfetto { +namespace base { + +namespace internal { +// Used for the most common cases of ScopedResource where there is only one +// invalid value. +template +struct DefaultValidityChecker { + static bool IsValid(T t) { return t != InvalidValue; } +}; +} // namespace internal + +// RAII classes for auto-releasing fds and dirs. +// if T is a pointer type, InvalidValue must be nullptr. Doing otherwise +// causes weird unexpected behaviors (See https://godbolt.org/z/5nGMW4). +template > +class ScopedResource { + public: + using ValidityChecker = Checker; + static constexpr T kInvalid = InvalidValue; + + explicit ScopedResource(T t = InvalidValue) : t_(t) {} + ScopedResource(ScopedResource&& other) noexcept { + t_ = other.t_; + other.t_ = InvalidValue; + } + ScopedResource& operator=(ScopedResource&& other) { + reset(other.t_); + other.t_ = InvalidValue; + return *this; + } + T get() const { return t_; } + T operator*() const { return t_; } + explicit operator bool() const { return Checker::IsValid(t_); } + void reset(T r = InvalidValue) { + if (Checker::IsValid(t_)) { + int res = CloseFunction(t_); + if (CheckClose) + PERFETTO_CHECK(res == 0); + } + t_ = r; + } + T release() { + T t = t_; + t_ = InvalidValue; + return t; + } + ~ScopedResource() { reset(InvalidValue); } + + private: + ScopedResource(const ScopedResource&) = delete; + ScopedResource& operator=(const ScopedResource&) = delete; + T t_; +}; + +// Declared in file_utils.h. Forward declared to avoid #include cycles. +int PERFETTO_EXPORT_COMPONENT CloseFile(int fd); + +// Use this for file resources obtained via open() and similar APIs. +using ScopedFile = ScopedResource; +using ScopedFstream = ScopedResource; + +// Use this for resources that are HANDLE on Windows. See comments in +// platform_handle.h +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) +using ScopedPlatformHandle = ScopedResource; +#else +// On non-windows systems we alias ScopedPlatformHandle to ScopedFile because +// they are really the same. This is to allow assignments between the two in +// Linux-specific code paths that predate ScopedPlatformHandle. +static_assert(std::is_same::value, ""); +using ScopedPlatformHandle = ScopedFile; + +// DIR* does not exist on Windows. +using ScopedDir = ScopedResource; +#endif + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_SCOPED_FILE_H_ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_EVENT_FD_H_ +#define INCLUDE_PERFETTO_EXT_BASE_EVENT_FD_H_ + +// gen_amalgamated expanded: #include "perfetto/base/build_config.h" +// gen_amalgamated expanded: #include "perfetto/base/platform_handle.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/scoped_file.h" + +namespace perfetto { +namespace base { + +// A waitable event that can be used with poll/select. +// This is really a wrapper around eventfd_create with a pipe-based fallback +// for other platforms where eventfd is not supported. +class EventFd { + public: + EventFd(); + ~EventFd(); + EventFd(EventFd&&) noexcept = default; + EventFd& operator=(EventFd&&) = default; + + // The non-blocking file descriptor that can be polled to wait for the event. + PlatformHandle fd() const { return event_handle_.get(); } + + // Can be called from any thread. + void Notify(); + + // Can be called from any thread. If more Notify() are queued a Clear() call + // can clear all of them (up to 16 per call). + void Clear(); + + private: + // The eventfd, when eventfd is supported, otherwise this is the read end of + // the pipe for fallback mode. + ScopedPlatformHandle event_handle_; + +// QNX is specified because it is a non-Linux UNIX platform but it +// still sets the PERFETTO_OS_LINUX flag to be as compatible as possible +// with the Linux build. +#if !PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX_BUT_NOT_QNX) && \ + !PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) && \ + !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + // On Mac and other non-Linux UNIX platforms a pipe-based fallback is used. + // The write end of the wakeup pipe. + ScopedFile write_fd_; +#endif +}; + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_EVENT_FD_H_ +// gen_amalgamated begin header: include/perfetto/ext/base/pipe.h +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_PIPE_H_ +#define INCLUDE_PERFETTO_EXT_BASE_PIPE_H_ + +// gen_amalgamated expanded: #include "perfetto/base/platform_handle.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/scoped_file.h" + +namespace perfetto { +namespace base { + +class Pipe { + public: + enum Flags { + kBothBlock = 0, +#if !PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + kBothNonBlock, + kRdNonBlock, + kWrNonBlock, +#endif + }; + + static Pipe Create(Flags = kBothBlock); + + Pipe(); + Pipe(Pipe&&) noexcept; + Pipe& operator=(Pipe&&); + + ScopedPlatformHandle rd; + ScopedPlatformHandle wr; +}; + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_PIPE_H_ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// gen_amalgamated expanded: #include "perfetto/base/build_config.h" + +#include +#include + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) +#include +#include +#elif PERFETTO_BUILDFLAG(PERFETTO_OS_QNX) +#include +#elif PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \ + PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) +#include +#include +#else // Mac, Fuchsia and other non-Linux UNIXes +#include +#endif + +// gen_amalgamated expanded: #include "perfetto/base/logging.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/event_fd.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/pipe.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/utils.h" + +namespace perfetto { +namespace base { + +EventFd::~EventFd() = default; + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) +EventFd::EventFd() { + event_handle_.reset( + CreateEventA(/*lpEventAttributes=*/nullptr, /*bManualReset=*/true, + /*bInitialState=*/false, /*bInitialState=*/nullptr)); +} + +void EventFd::Notify() { + if (!SetEvent(event_handle_.get())) // 0: fail, !0: success, unlike UNIX. + PERFETTO_DFATAL("EventFd::Notify()"); +} + +void EventFd::Clear() { + if (!ResetEvent(event_handle_.get())) // 0: fail, !0: success, unlike UNIX. + PERFETTO_DFATAL("EventFd::Clear()"); +} + +#elif PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX_BUT_NOT_QNX) || \ + PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) + +EventFd::EventFd() { + event_handle_.reset(eventfd(/*initval=*/0, EFD_CLOEXEC | EFD_NONBLOCK)); + PERFETTO_CHECK(event_handle_); +} + +void EventFd::Notify() { + const uint64_t value = 1; + ssize_t ret = write(event_handle_.get(), &value, sizeof(value)); + if (ret <= 0 && errno != EAGAIN) + PERFETTO_DFATAL("EventFd::Notify()"); +} + +void EventFd::Clear() { + uint64_t value; + ssize_t ret = + PERFETTO_EINTR(read(event_handle_.get(), &value, sizeof(value))); + if (ret <= 0 && errno != EAGAIN) + PERFETTO_DFATAL("EventFd::Clear()"); +} + +#else + +EventFd::EventFd() { + // Make the pipe non-blocking so that we never block the waking thread (either + // the main thread or another one) when scheduling a wake-up. + Pipe pipe = Pipe::Create(Pipe::kBothNonBlock); + event_handle_ = ScopedPlatformHandle(std::move(pipe.rd).release()); + write_fd_ = std::move(pipe.wr); +} + +void EventFd::Notify() { + const uint64_t value = 1; + ssize_t ret = write(write_fd_.get(), &value, sizeof(uint8_t)); + if (ret <= 0 && errno != EAGAIN) + PERFETTO_DFATAL("EventFd::Notify()"); +} + +void EventFd::Clear() { + // Drain the byte(s) written to the wake-up pipe. We can potentially read + // more than one byte if several wake-ups have been scheduled. + char buffer[16]; + ssize_t ret = + PERFETTO_EINTR(read(event_handle_.get(), &buffer[0], sizeof(buffer))); + if (ret <= 0 && errno != EAGAIN) + PERFETTO_DFATAL("EventFd::Clear()"); +} +#endif + +} // namespace base +} // namespace perfetto +// gen_amalgamated begin source: src/base/file_utils.cc +// gen_amalgamated begin header: include/perfetto/ext/base/file_utils.h +// gen_amalgamated begin header: include/perfetto/base/status.h +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_BASE_STATUS_H_ +#define INCLUDE_PERFETTO_BASE_STATUS_H_ + +#include +#include +#include +#include + +// gen_amalgamated expanded: #include "perfetto/base/compiler.h" +// gen_amalgamated expanded: #include "perfetto/base/export.h" +// gen_amalgamated expanded: #include "perfetto/base/logging.h" + +namespace perfetto { +namespace base { + +// Represents either the success or the failure message of a function. +// This can used as the return type of functions which would usually return an +// bool for success or int for errno but also wants to add some string context +// (ususally for logging). +// +// Similar to absl::Status, an optional "payload" can also be included with more +// context about the error. This allows passing additional metadata about the +// error (e.g. location of errors, potential mitigations etc). +class PERFETTO_EXPORT_COMPONENT Status { + public: + Status() : ok_(true) {} + explicit Status(std::string msg) : ok_(false), message_(std::move(msg)) { + PERFETTO_CHECK(!message_.empty()); + } + + // Copy operations. + Status(const Status&) = default; + Status& operator=(const Status&) = default; + + // Move operations. The moved-from state is valid but unspecified. + Status(Status&&) noexcept = default; + Status& operator=(Status&&) = default; + + bool ok() const { return ok_; } + + // When ok() is false this returns the error message. Returns the empty string + // otherwise. + const std::string& message() const { return message_; } + const char* c_message() const { return message_.c_str(); } + + ////////////////////////////////////////////////////////////////////////////// + // Payload Management APIs + ////////////////////////////////////////////////////////////////////////////// + + // Payloads can be attached to error statuses to provide additional context. + // + // Payloads are (key, value) pairs, where the key is a string acting as a + // unique "type URL" and the value is an opaque string. The "type URL" should + // be unique, follow the format of a URL and, ideally, documentation on how to + // interpret its associated data should be available. + // + // To attach a payload to a status object, call `Status::SetPayload()`. + // Similarly, to extract the payload from a status, call + // `Status::GetPayload()`. + // + // Note: the payload APIs are only meaningful to call when the status is an + // error. Otherwise, all methods are noops. + + // Gets the payload for the given |type_url| if one exists. + // + // Will always return std::nullopt if |ok()|. + std::optional GetPayload(std::string_view type_url) const; + + // Sets the payload for the given key. The key should + // + // Will always do nothing if |ok()|. + void SetPayload(std::string_view type_url, std::string value); + + // Erases the payload for the given string and returns true if the payload + // existed and was erased. + // + // Will always do nothing if |ok()|. + bool ErasePayload(std::string_view type_url); + + private: + struct Payload { + std::string type_url; + std::string payload; + }; + + bool ok_ = false; + std::string message_; + std::vector payloads_; +}; + +// Returns a status object which represents the Ok status. +inline Status OkStatus() { + return Status(); +} + +Status ErrStatus(const char* format, ...) PERFETTO_PRINTF_FORMAT(1, 2); + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_BASE_STATUS_H_ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_FILE_UTILS_H_ +#define INCLUDE_PERFETTO_EXT_BASE_FILE_UTILS_H_ + +#include // For mode_t & O_RDONLY/RDWR. Exists also on Windows. +#include + +#include +#include +#include + +// gen_amalgamated expanded: #include "perfetto/base/build_config.h" +// gen_amalgamated expanded: #include "perfetto/base/export.h" +// gen_amalgamated expanded: #include "perfetto/base/status.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/scoped_file.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/utils.h" + +namespace perfetto { +namespace base { + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) +using FileOpenMode = int; +inline constexpr char kDevNull[] = "NUL"; +#else +using FileOpenMode = mode_t; +inline constexpr char kDevNull[] = "/dev/null"; +#endif + +constexpr FileOpenMode kFileModeInvalid = static_cast(-1); + +bool ReadPlatformHandle(PlatformHandle, std::string* out); +bool ReadFileDescriptor(int fd, std::string* out); +bool ReadFileStream(FILE* f, std::string* out); +bool ReadFile(const std::string& path, std::string* out); + +// A wrapper around read(2). It deals with Linux vs Windows includes. It also +// deals with handling EINTR. Has the same semantics of UNIX's read(2). +ssize_t Read(int fd, void* dst, size_t dst_size); + +// Call write until all data is written or an error is detected. +// +// man 2 write: +// If a write() is interrupted by a signal handler before any bytes are +// written, then the call fails with the error EINTR; if it is +// interrupted after at least one byte has been written, the call +// succeeds, and returns the number of bytes written. +ssize_t WriteAll(int fd, const void* buf, size_t count); + +ssize_t WriteAllHandle(PlatformHandle, const void* buf, size_t count); + +ScopedFile OpenFile(const std::string& path, + int flags, + FileOpenMode = kFileModeInvalid); +ScopedFstream OpenFstream(const char* path, const char* mode); + +// This is an alias for close(). It's to avoid leaking Windows.h in headers. +// Exported because ScopedFile is used in the /include/ext API by Chromium +// component builds. +int PERFETTO_EXPORT_COMPONENT CloseFile(int fd); + +bool FlushFile(int fd); + +// Returns true if mkdir succeeds, false if it fails (see errno in that case). +bool Mkdir(const std::string& path); + +// Calls rmdir() on UNIX, _rmdir() on Windows. +bool Rmdir(const std::string& path); + +// Wrapper around access(path, F_OK). +bool FileExists(const std::string& path); + +// Gets the extension for a filename. If the file has two extensions, returns +// only the last one (foo.pb.gz => .gz). Returns empty string if there is no +// extension. +std::string GetFileExtension(const std::string& filename); + +// Puts the path to all files under |dir_path| in |output|, recursively walking +// subdirectories. File paths are relative to |dir_path|. Only files are +// included, not directories. Path separator is always '/', even on windows (not +// '\'). +base::Status ListFilesRecursive(const std::string& dir_path, + std::vector& output); + +// Sets |path|'s owner group to |group_name| and permission mode bits to +// |mode_bits|. +base::Status SetFilePermissions(const std::string& path, + const std::string& group_name, + const std::string& mode_bits); + +// Returns the size of the file located at |path|, or nullopt in case of error. +std::optional GetFileSize(const std::string& path); + +// Returns the size of the open file |fd|, or nullopt in case of error. +std::optional GetFileSize(PlatformHandle fd); + +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_FILE_UTILS_H_ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// gen_amalgamated expanded: #include "perfetto/ext/base/file_utils.h" + +#include +#include + +#include +#include +#include +#include +#include + +// gen_amalgamated expanded: #include "perfetto/base/build_config.h" +// gen_amalgamated expanded: #include "perfetto/base/compiler.h" +// gen_amalgamated expanded: #include "perfetto/base/logging.h" +// gen_amalgamated expanded: #include "perfetto/base/platform_handle.h" +// gen_amalgamated expanded: #include "perfetto/base/status.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/platform.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/scoped_file.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/string_utils.h" +// gen_amalgamated expanded: #include "perfetto/ext/base/utils.h" + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) +#include +#include +#include +#include +#else +#include +#include +#endif + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \ + PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \ + PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE) +#define PERFETTO_SET_FILE_PERMISSIONS +#include +#include +#include +#include +#include +#endif + +namespace perfetto { +namespace base { +namespace { +constexpr size_t kBufSize = 2048; + +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) +// Wrap FindClose to: (1) make the return unix-style; (2) deal with stdcall. +int CloseFindHandle(HANDLE h) { + return FindClose(h) ? 0 : -1; +} + +std::optional ToUtf16(const std::string str) { + int len = MultiByteToWideChar(CP_UTF8, 0, str.data(), + static_cast(str.size()), nullptr, 0); + if (len < 0) { + return std::nullopt; + } + std::vector tmp; + tmp.resize(static_cast::size_type>(len)); + len = + MultiByteToWideChar(CP_UTF8, 0, str.data(), static_cast(str.size()), + tmp.data(), static_cast(tmp.size())); + if (len < 0) { + return std::nullopt; + } + PERFETTO_CHECK(static_cast::size_type>(len) == + tmp.size()); + return std::wstring(tmp.data(), tmp.size()); +} + +#endif + +} // namespace + +ssize_t Read(int fd, void* dst, size_t dst_size) { + ssize_t ret; + platform::BeforeMaybeBlockingSyscall(); +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + ret = _read(fd, dst, static_cast(dst_size)); +#else + ret = PERFETTO_EINTR(read(fd, dst, dst_size)); +#endif + platform::AfterMaybeBlockingSyscall(); + return ret; +} + +bool ReadFileDescriptor(int fd, std::string* out) { + // Do not override existing data in string. + size_t i = out->size(); + + struct stat buf {}; + if (fstat(fd, &buf) != -1) { + if (buf.st_size > 0) + out->resize(i + static_cast(buf.st_size)); + } + + ssize_t bytes_read; + for (;;) { + if (out->size() < i + kBufSize) + out->resize(out->size() + kBufSize); + + bytes_read = Read(fd, &((*out)[i]), kBufSize); + if (bytes_read > 0) { + i += static_cast(bytes_read); + } else { + out->resize(i); + return bytes_read == 0; + } + } +} + +bool ReadPlatformHandle(PlatformHandle h, std::string* out) { +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + // Do not override existing data in string. + size_t i = out->size(); + + for (;;) { + if (out->size() < i + kBufSize) + out->resize(out->size() + kBufSize); + DWORD bytes_read = 0; + auto res = ::ReadFile(h, &((*out)[i]), kBufSize, &bytes_read, nullptr); + if (res && bytes_read > 0) { + i += static_cast(bytes_read); + } else { + out->resize(i); + const bool is_eof = res && bytes_read == 0; + auto err = res ? 0 : GetLastError(); + // The "Broken pipe" error on Windows is slighly different than Unix: + // On Unix: a "broken pipe" error can happen only on the writer side. On + // the reader there is no broken pipe, just a EOF. + // On windows: the reader also sees a broken pipe error. + // Here we normalize on the Unix behavior, treating broken pipe as EOF. + return is_eof || err == ERROR_BROKEN_PIPE; + } + } +#else + return ReadFileDescriptor(h, out); +#endif +} + +bool ReadFileStream(FILE* f, std::string* out) { + return ReadFileDescriptor(fileno(f), out); +} + +bool ReadFile(const std::string& path, std::string* out) { + base::ScopedFile fd = base::OpenFile(path, O_RDONLY); + if (!fd) + return false; + + return ReadFileDescriptor(*fd, out); +} + +ssize_t WriteAll(int fd, const void* buf, size_t count) { + size_t written = 0; + while (written < count) { + // write() on windows takes an unsigned int size. + uint32_t bytes_left = static_cast( + std::min(count - written, static_cast(UINT32_MAX))); + platform::BeforeMaybeBlockingSyscall(); + ssize_t wr = PERFETTO_EINTR( + write(fd, static_cast(buf) + written, bytes_left)); + platform::AfterMaybeBlockingSyscall(); + if (wr == 0) + break; + if (wr < 0) + return wr; + written += static_cast(wr); + } + return static_cast(written); +} + +ssize_t WriteAllHandle(PlatformHandle h, const void* buf, size_t count) { +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + DWORD wsize = 0; + if (::WriteFile(h, buf, static_cast(count), &wsize, nullptr)) { + return wsize; + } else { + return -1; + } +#else + return WriteAll(h, buf, count); +#endif +} + +bool FlushFile(int fd) { + PERFETTO_DCHECK(fd != 0); +#if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \ + PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) + return !PERFETTO_EINTR(fdatasync(fd)); +#elif PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + return !PERFETTO_EINTR(_commit(fd)); +#else + return !PERFETTO_EINTR(fsync(fd)); +#endif +} + +bool Mkdir(const std::string& path) { +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + return _mkdir(path.c_str()) == 0; +#else + return mkdir(path.c_str(), 0755) == 0; +#endif +} + +bool Rmdir(const std::string& path) { +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + return _rmdir(path.c_str()) == 0; +#else + return rmdir(path.c_str()) == 0; +#endif +} + +int CloseFile(int fd) { + return close(fd); +} + +ScopedFile OpenFile(const std::string& path, int flags, FileOpenMode mode) { + // If a new file might be created, ensure that the permissions for the new + // file are explicitly specified. + PERFETTO_CHECK((flags & O_CREAT) == 0 || mode != kFileModeInvalid); +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + // Always use O_BINARY on Windows, to avoid silly EOL translations. + ScopedFile fd(_open(path.c_str(), flags | O_BINARY, mode)); +#else + // Always open a ScopedFile with O_CLOEXEC so we can safely fork and exec. + ScopedFile fd(open(path.c_str(), flags | O_CLOEXEC, mode)); +#endif + return fd; +} + +ScopedFstream OpenFstream(const char* path, const char* mode) { + ScopedFstream file; +// On Windows fopen interprets filename using the ANSI or OEM codepage but +// sqlite3_value_text returns a UTF-8 string. To make sure we interpret the +// filename correctly we use _wfopen and a UTF-16 string on windows. +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + auto w_path = ToUtf16(path); + auto w_mode = ToUtf16(mode); + if (w_path && w_mode) { + file.reset(_wfopen(w_path->c_str(), w_mode->c_str())); + } +#else + file.reset(fopen(path, mode)); +#endif + return file; +} + +bool FileExists(const std::string& path) { +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + return _access(path.c_str(), 0) == 0; +#else + return access(path.c_str(), F_OK) == 0; +#endif +} + +// Declared in base/platform_handle.h. +int ClosePlatformHandle(PlatformHandle handle) { +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + // Make the return value UNIX-style. + return CloseHandle(handle) ? 0 : -1; +#else + return close(handle); +#endif +} + +base::Status ListFilesRecursive(const std::string& dir_path, + std::vector& output) { + std::string root_dir_path = dir_path; + if (root_dir_path.back() == '\\') { + root_dir_path.back() = '/'; + } else if (root_dir_path.back() != '/') { + root_dir_path.push_back('/'); + } + + // dir_queue contains full paths to the directories. The paths include the + // root_dir_path at the beginning and the trailing slash at the end. + std::deque dir_queue; + dir_queue.push_back(root_dir_path); + + while (!dir_queue.empty()) { + const std::string cur_dir = std::move(dir_queue.front()); + dir_queue.pop_front(); +#if PERFETTO_BUILDFLAG(PERFETTO_OS_NACL) + return base::ErrStatus("ListFilesRecursive not supported yet"); +#elif PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + std::string glob_path = cur_dir + "*"; + // + 1 because we also have to count the NULL terminator. + if (glob_path.length() + 1 > MAX_PATH) + return base::ErrStatus("Directory path %s is too long", dir_path.c_str()); + WIN32_FIND_DATAA ffd; + + base::ScopedResource + hFind(FindFirstFileA(glob_path.c_str(), &ffd)); + if (!hFind) { + // For empty directories, there should be at least one entry '.'. + // If FindFirstFileA returns INVALID_HANDLE_VALUE, this means directory + // couldn't be accessed. + return base::ErrStatus("Failed to open directory %s", cur_dir.c_str()); + } + do { + if (strcmp(ffd.cFileName, ".") == 0 || strcmp(ffd.cFileName, "..") == 0) + continue; + if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + std::string subdir_path = cur_dir + ffd.cFileName + '/'; + dir_queue.push_back(subdir_path); + } else { + const std::string full_path = cur_dir + ffd.cFileName; + PERFETTO_CHECK(full_path.length() > root_dir_path.length()); + output.push_back(full_path.substr(root_dir_path.length())); + } + } while (FindNextFileA(*hFind, &ffd)); +#else + ScopedDir dir = ScopedDir(opendir(cur_dir.c_str())); + if (!dir) { + return base::ErrStatus("Failed to open directory %s", cur_dir.c_str()); + } + for (auto* dirent = readdir(dir.get()); dirent != nullptr; + dirent = readdir(dir.get())) { + if (strcmp(dirent->d_name, ".") == 0 || + strcmp(dirent->d_name, "..") == 0) { + continue; + } +#if PERFETTO_BUILDFLAG(PERFETTO_OS_QNX) + struct stat* dirstat; + const std::string full_path = cur_dir + dirent->d_name; + PERFETTO_CHECK(stat(full_path.c_str(), dirstat) == 0); + if (S_ISDIR(dirstat->st_mode)) { + dir_queue.push_back(full_path + '/'); + } else if (S_ISREG(dirstat->st_mode)) { + PERFETTO_CHECK(full_path.length() > root_dir_path.length()); + output.push_back(full_path.substr(root_dir_path.length())); + } +#else + if (dirent->d_type == DT_DIR) { + dir_queue.push_back(cur_dir + dirent->d_name + '/'); + } else if (dirent->d_type == DT_REG) { + const std::string full_path = cur_dir + dirent->d_name; + PERFETTO_CHECK(full_path.length() > root_dir_path.length()); + output.push_back(full_path.substr(root_dir_path.length())); + } +#endif + } +#endif + } + return base::OkStatus(); +} + +std::string GetFileExtension(const std::string& filename) { + auto ext_idx = filename.rfind('.'); + if (ext_idx == std::string::npos) + return std::string(); + return filename.substr(ext_idx); +} + +base::Status SetFilePermissions(const std::string& file_path, + const std::string& group_name_or_id, + const std::string& mode_bits) { +#ifdef PERFETTO_SET_FILE_PERMISSIONS + PERFETTO_CHECK(!file_path.empty()); + PERFETTO_CHECK(!group_name_or_id.empty()); + + // Default |group_id| to -1 for not changing the group ownership. + gid_t group_id = static_cast(-1); + auto maybe_group_id = base::StringToUInt32(group_name_or_id); + if (maybe_group_id) { // A numerical group ID. + group_id = *maybe_group_id; + } else { // A group name. + struct group* file_group = nullptr; + // Query the group ID of |group|. + do { + file_group = getgrnam(group_name_or_id.c_str()); + } while (file_group == nullptr && errno == EINTR); + if (file_group == nullptr) { + return base::ErrStatus("Failed to get group information of %s ", + group_name_or_id.c_str()); + } + group_id = file_group->gr_gid; + } + + if (PERFETTO_EINTR(chown(file_path.c_str(), geteuid(), group_id))) { + return base::ErrStatus("Failed to chown %s ", file_path.c_str()); + } + + // |mode| accepts values like "0660" as "rw-rw----" mode bits. + auto mode_value = base::StringToInt32(mode_bits, 8); + if (!(mode_bits.size() == 4 && mode_value.has_value())) { + return base::ErrStatus( + "The chmod mode bits must be a 4-digit octal number, e.g. 0660"); + } + if (PERFETTO_EINTR( + chmod(file_path.c_str(), static_cast(mode_value.value())))) { + return base::ErrStatus("Failed to chmod %s", file_path.c_str()); + } + return base::OkStatus(); +#else + base::ignore_result(file_path); + base::ignore_result(group_name_or_id); + base::ignore_result(mode_bits); + return base::ErrStatus( + "Setting file permissions is not supported on this platform"); +#endif +} + +std::optional GetFileSize(const std::string& file_path) { +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + // This does not use base::OpenFile to avoid getting an exclusive lock. + base::ScopedPlatformHandle fd( + CreateFileA(file_path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr)); +#else + base::ScopedFile fd(base::OpenFile(file_path, O_RDONLY | O_CLOEXEC)); +#endif + if (!fd) { + return std::nullopt; + } + return GetFileSize(*fd); +} + +std::optional GetFileSize(PlatformHandle fd) { +#if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN) + LARGE_INTEGER file_size; + file_size.QuadPart = 0; + if (!GetFileSizeEx(fd, &file_size)) { + return std::nullopt; + } + static_assert(sizeof(decltype(file_size.QuadPart)) <= sizeof(uint64_t)); + return static_cast(file_size.QuadPart); +#else + struct stat buf {}; + if (fstat(fd, &buf) == -1) { + return std::nullopt; + } + static_assert(sizeof(decltype(buf.st_size)) <= sizeof(uint64_t)); + return static_cast(buf.st_size); +#endif +} + +} // namespace base +} // namespace perfetto +// gen_amalgamated begin source: src/base/getopt_compat.cc +// gen_amalgamated begin header: include/perfetto/ext/base/getopt_compat.h +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INCLUDE_PERFETTO_EXT_BASE_GETOPT_COMPAT_H_ +#define INCLUDE_PERFETTO_EXT_BASE_GETOPT_COMPAT_H_ + +#include // For std::nullptr_t + +// No translation units other than base/getopt.h and getopt_compat_unittest.cc +// should directly include this file. Use base/getopt.h instead. + +namespace perfetto { +namespace base { +namespace getopt_compat { + +// A tiny getopt() replacement for Windows, which doesn't have . +// This implementation is based on the subset of features that we use in the +// Perfetto codebase. It doesn't even try to deal with the full surface of GNU's +// getopt(). +// Limitations: +// - getopt_long_only() is not supported. +// - optional_argument is not supported. That is extremely subtle and caused us +// problems in the past with GNU's getopt. +// - It does not reorder non-option arguments. It behaves like MacOS getopt, or +// GNU's when POSIXLY_CORRECT=1. +// - Doesn't expose optopt or opterr. +// - option.flag and longindex are not supported and must be nullptr. + +enum { + no_argument = 0, + required_argument = 1, +}; + +struct option { + const char* name; + int has_arg; + std::nullptr_t flag; // Only nullptr is supported. + int val; +}; + +extern char* optarg; +extern int optind; +extern int optopt; +extern int opterr; + +int getopt_long(int argc, + char** argv, + const char* shortopts, + const option* longopts, + std::nullptr_t /*longindex is not supported*/); + +int getopt(int argc, char** argv, const char* shortopts); + +} // namespace getopt_compat +} // namespace base +} // namespace perfetto + +#endif // INCLUDE_PERFETTO_EXT_BASE_GETOPT_COMPAT_H_ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// gen_amalgamated expanded: #include "perfetto/ext/base/getopt_compat.h" + +#include +#include +#include + +#include + +// gen_amalgamated expanded: #include "perfetto/base/logging.h" + +namespace perfetto { +namespace base { +namespace getopt_compat { + +char* optarg = nullptr; +int optind = 0; +int optopt = 0; +int opterr = 1; + +namespace { + +char* nextchar = nullptr; + +const option* LookupLongOpt(const std::vector