ggml-org
diff --git a/‎.devcontainer/Dockerfile‎
Lines changed: 89 additions & 0 deletions b/‎.devcontainer/Dockerfile‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎.devcontainer/README.md‎
Lines changed: 135 additions & 0 deletions b/‎.devcontainer/README.md‎
Lines changed: 135 additions & 0 deletions
diff --git a/‎.devcontainer/devcontainer.json‎
Lines changed: 36 additions & 0 deletions b/‎.devcontainer/devcontainer.json‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎.devcontainer/launch.json‎
Lines changed: 77 additions & 0 deletions b/‎.devcontainer/launch.json‎
Lines changed: 77 additions & 0 deletions
@@ -0,0 +1,89 @@
+FROM ubuntu:24.04
+
+# Avoid prompts from apt
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Copy in a zscaler.crt if one exists
+# This allows the container to access the internet on corporate laptops
+COPY zscaler.cr[t] /usr/local/share/ca-certificates/
+
+# This tells various tools to use the system CA certificates
+ENV REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
+ENV SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt
+ENV NODE_OPTIONS=--use-openssl-ca
+
+# Update and install system dependencies
+RUN apt-get update && \
+    apt-get install -y \
+        build-essential \
+        ca-certificates \
+        cmake \
+        git \
+        curl \
+        wget \
+        pkg-config \
+        python3 \
+        python3-pip \
+        python3-venv \
+        libcurl4-openssl-dev \
+        libnuma-dev \
+        numactl \
+        hwloc-nox \
+        libhwloc-dev \
+        ccache \
+        ninja-build \
+        gdb \
+        valgrind \
+        gh && \
+    update-ca-certificates && \
+    mkdir -p --mode=0755 /etc/apt/keyrings && \
+    wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
+        gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \
+    echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.4.2 noble main" \
+        | tee /etc/apt/sources.list.d/rocm.list && \
+    echo 'Package: *' \
+        | tee /etc/apt/preferences.d/rocm-pin-600 && \
+    echo 'Pin: release o=repo.radeon.com' \
+        | tee -a /etc/apt/preferences.d/rocm-pin-600 && \
+    echo 'Pin-Priority: 600' \
+        | tee -a /etc/apt/preferences.d/rocm-pin-600 && \
+    apt-get update && \
+    apt-get install -y rocm && \
+    apt-get autoremove -y && \
+    apt-get clean 
+
+# Install Python dependencies for gguf conversion tools
+RUN python3 -m pip install --break-system-packages \
+    numpy \
+    torch \
+    transformers \
+    sentencepiece \
+    protobuf \
+    gguf
+
+# Set up ccache for faster compilation
+ENV PATH="/usr/lib/ccache:${PATH}"
+ENV CCACHE_DIR="/tmp/ccache"
+RUN mkdir -p /tmp/ccache
+
+# Create a non-root user
+RUN useradd -m -s /bin/bash developer && \
+    usermod -aG sudo developer && \
+    echo "developer ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
+
+# Set working directory
+WORKDIR /workspace
+
+# Switch to non-root user
+USER developer
+
+# Set up shell environment
+RUN echo 'export PS1="\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ "' >> ~/.bashrc && \
+    echo 'alias ll="ls -alF"' >> ~/.bashrc && \
+    echo 'alias la="ls -A"' >> ~/.bashrc && \
+    echo 'alias l="ls -CF"' >> ~/.bashrc
+
+# Expose common ports
+EXPOSE 8080 8081
+
+CMD ["/bin/bash"]
@@ -0,0 +1,135 @@
+# llama.cpp Development Container
+
+This dev container provides a complete Ubuntu 24.04 environment for building and testing llama.cpp with NUMA support.
+
+## Features
+
+- **Ubuntu 24.04 LTS** base image
+- **Complete build toolchain**: gcc, cmake, ninja, ccache
+- **NUMA support**: libnuma-dev, numactl, hwloc for CPU topology detection
+- **Python environment**: with all necessary packages for GGUF conversion tools
+- **VS Code integration**: with C/C++, CMake, and Python extensions
+- **Development tools**: gdb, valgrind for debugging
+
+## Quick Start
+
+1. **Open in VS Code**: Make sure you have the "Dev Containers" extension installed, then:
+   - Open the llama.cpp folder in VS Code
+   - Press `Ctrl+Shift+P` (or `Cmd+Shift+P` on Mac)
+   - Type "Dev Containers: Reopen in Container"
+   - Select it and wait for the container to build and start
+
+2. **Build the project**:
+   ```bash
+   cmake -B build -DCMAKE_BUILD_TYPE=Release
+   cmake --build build --parallel
+   ```
+
+3. **Test NUMA functionality**:
+   ```bash
+   # Check NUMA topology
+   numactl --hardware
+   
+   # Test CPU topology detection
+   ./build/bin/llama-server --cpu-topology
+   
+   # Run with specific NUMA settings
+   numactl --cpunodebind=0 --membind=0 ./build/bin/llama-server --model path/to/model.gguf
+   ```
+
+## Available Tools
+
+### System Tools
+- `numactl`: NUMA policy control
+- `hwloc-info`: Hardware locality information
+- `lscpu`: CPU information
+- `ccache`: Compiler cache for faster rebuilds
+
+### Build Configurations
+
+#### Debug Build (default post-create)
+```bash
+cmake -B build -DCMAKE_BUILD_TYPE=Debug
+cmake --build build --parallel
+```
+
+#### Release Build (optimized)
+```bash
+cmake -B build -DCMAKE_BUILD_TYPE=Release
+cmake --build build --parallel
+```
+
+#### With Additional Options
+```bash
+# Enable OpenBLAS
+cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
+
+# Static build
+cmake -B build -DBUILD_SHARED_LIBS=OFF
+
+# Disable CURL if not needed
+cmake -B build -DLLAMA_CURL=OFF
+```
+
+## Testing NUMA Improvements
+
+The container includes tools to test the NUMA improvements:
+
+### CPU Topology Detection
+```bash
+# View detailed CPU information
+./build/bin/llama-server --cpu-topology
+
+# Check current NUMA configuration
+numactl --show
+
+# Display NUMA hardware topology
+numactl --hardware
+```
+
+### Performance Testing
+```bash
+# Test with default settings (hyperthreading enabled)
+./build/bin/llama-bench -m model.gguf
+
+# Test without hyperthreading
+./build/bin/llama-bench -m model.gguf --no-hyperthreading
+
+# Test with specific thread count
+./build/bin/llama-bench -m model.gguf --threads 8
+
+# Test with NUMA binding
+numactl --cpunodebind=0 --membind=0 ./build/bin/llama-bench -m model.gguf
+```
+
+### Environment Variables
+```bash
+# Disable hyperthreading via environment
+LLAMA_NO_HYPERTHREADING=1 ./build/bin/llama-server --model model.gguf
+
+# Enable efficiency cores
+LLAMA_USE_EFFICIENCY_CORES=1 ./build/bin/llama-server --model model.gguf
+```
+
+## Development Workflow
+
+1. **Code changes**: Edit files in VS Code with full IntelliSense support
+2. **Build**: Use `Ctrl+Shift+P` → "CMake: Build" or terminal commands
+3. **Debug**: Set breakpoints and use the integrated debugger
+4. **Test**: Run executables directly or through the testing framework
+
+## Troubleshooting
+
+### Container Build Issues
+- Ensure Docker Desktop is running
+- Try rebuilding: `Ctrl+Shift+P` → "Dev Containers: Rebuild Container"
+
+### NUMA Issues
+- Check if running on a NUMA system: `numactl --hardware`
+- Verify CPU topology detection: `lscpu` and `hwloc-info`
+- Test CPU affinity: `taskset -c 0-3 ./your-program`
+
+### Build Issues
+- Clear build cache: `rm -rf build && cmake -B build`
+- Check ccache stats: `ccache -s`
+- Use verbose build: `cmake --build build --verbose`
@@ -0,0 +1,36 @@
+{
+	"name": "llama.cpp Development",
+	"dockerFile": "Dockerfile",
+	"customizations": {
+		"vscode": {
+			"extensions": [
+				"ms-vscode.cpptools-extension-pack",
+				"ms-vscode.cmake-tools",
+				"ms-python.python",
+				"ms-python.black-formatter",
+				"github.copilot",
+				"github.copilot-chat"
+			],
+			"settings": {
+				"cmake.configureOnOpen": true,
+				"cmake.buildDirectory": "${workspaceFolder}/build",
+				"C_Cpp.default.configurationProvider": "ms-vscode.cmake-tools",
+				"C_Cpp.default.cStandard": "c11",
+				"C_Cpp.default.cppStandard": "c++14"
+			}
+		}
+	},
+	"mounts": [
+		"source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind"
+	],
+	"postCreateCommand": "cmake -B build -DCMAKE_BUILD_TYPE=Debug",
+	"forwardPorts": [8080],
+	"runArgs": [
+		"--privileged",
+		"--cap-add=SYS_ADMIN"
+	],
+	"features": {
+		"ghcr.io/devcontainers/features/git:1": {},
+		"ghcr.io/devcontainers/features/github-cli:1": {}
+	}
+}
@@ -0,0 +1,77 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Debug llama-server",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "${workspaceFolder}/build/bin/llama-server",
+            "args": [
+                "--model", "/path/to/your/model.gguf",
+                "--host", "0.0.0.0",
+                "--port", "8080",
+                "--cpu-topology"
+            ],
+            "stopAtEntry": false,
+            "cwd": "${workspaceFolder}",
+            "environment": [],
+            "externalConsole": false,
+            "MIMode": "gdb",
+            "setupCommands": [
+                {
+                    "description": "Enable pretty-printing for gdb",
+                    "text": "-enable-pretty-printing",
+                    "ignoreFailures": true
+                },
+                {
+                    "description": "Set Disassembly Flavor to Intel",
+                    "text": "set disassembly-flavor intel",
+                    "ignoreFailures": true
+                }
+            ],
+            "preLaunchTask": "cmake-build",
+            "miDebuggerPath": "/usr/bin/gdb"
+        },
+        {
+            "name": "Debug llama-cli",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "${workspaceFolder}/build/bin/llama-cli",
+            "args": [
+                "--model", "/path/to/your/model.gguf",
+                "--prompt", "Hello, world!",
+                "--no-hyperthreading"
+            ],
+            "stopAtEntry": false,
+            "cwd": "${workspaceFolder}",
+            "environment": [],
+            "externalConsole": false,
+            "MIMode": "gdb",
+            "setupCommands": [
+                {
+                    "description": "Enable pretty-printing for gdb",
+                    "text": "-enable-pretty-printing",
+                    "ignoreFailures": true
+                }
+            ],
+            "preLaunchTask": "cmake-build",
+            "miDebuggerPath": "/usr/bin/gdb"
+        },
+        {
+            "name": "Test CPU Topology",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "${workspaceFolder}/build/bin/llama-server",
+            "args": [
+                "--cpu-topology"
+            ],
+            "stopAtEntry": false,
+            "cwd": "${workspaceFolder}",
+            "environment": [],
+            "externalConsole": false,
+            "MIMode": "gdb",
+            "preLaunchTask": "cmake-build",
+            "miDebuggerPath": "/usr/bin/gdb"
+        }
+    ]
+}