Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ jobs:
echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1
fi
cd python/test/unit
python3 -m pytest -s -n 8 --ignore=hopper/test_flashattention.py --ignore=language/test_line_info.py --ignore=language/test_subprocess.py --ignore=test_debug.py
python3 -m pytest -s -n 8 --ignore=hopper/test_flashattention.py --ignore=language/test_line_info.py --ignore=language/test_subprocess.py --ignore=test_debug.py --ignore=test_address_sanitizer.py
python3 -m pytest -s -n 8 language/test_subprocess.py
python3 -m pytest -s -n 8 test_debug.py --forked
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
Expand Down Expand Up @@ -429,14 +429,27 @@ jobs:
cd python/test/unit
pytest --capture=tee-sys -rfs -n 12 language runtime \
--ignore=language/test_line_info.py \
--ignore=test_debug.py
--ignore=test_debug.py \
--ignore=test_address_sanitizer.py
# TODO: uncomment
# pytest --capture=tee-sys -rfs test_debug.py
TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \
pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s -n 8 language/test_line_info.py
- name: Run asan tests on HIP
run: |
cd python/test/unit
ulimit -s 1024
export PATH=$(find ~/.triton/llvm -name llvm-symbolizer -printf '%h\n'):$PATH
export LD_LIBRARY_PATH=$(find /opt -name libclang_rt.asan-x86_64.so -printf '%h\n'):$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$(find /opt -type d -wholename *lib/llvm/lib/asan):$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$(find /usr -name libcaffe2_nvrtc.so -printf '%h\n'):$LD_LIBRARY_PATH
export CLANG_ASAN_LIB=$(find /opt -name libclang_rt.asan-x86_64.so)
export HIP_ASAN_LIB=$(find /opt -wholename *lib/asan/libamdhip64.so)
ASAN_OPTIONS=detect_leaks=0,alloc_dealloc_mismatch=0 \
LD_PRELOAD=$CLANG_ASAN_LIB:$HIP_ASAN_LIB python3 -m pytest -s test_address_sanitizer.py
- name: Run regression tests
run: |
# Reenable test_functional_regression.py once it's fixed
Expand Down
19 changes: 15 additions & 4 deletions .github/workflows/integration-tests.yml.in
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ jobs:
echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1
fi
cd python/test/unit
python3 -m pytest -s -n 8 --ignore=hopper/test_flashattention.py --ignore=language/test_line_info.py --ignore=language/test_subprocess.py --ignore=test_debug.py
python3 -m pytest -s -n 8 --ignore=hopper/test_flashattention.py --ignore=language/test_line_info.py --ignore=language/test_subprocess.py --ignore=test_debug.py --ignore=test_address_sanitizer.py
python3 -m pytest -s -n 8 language/test_subprocess.py
python3 -m pytest -s -n 8 test_debug.py --forked
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
Expand All @@ -309,7 +309,6 @@ jobs:
python3 -m pytest -s hopper/test_flashattention.py
TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \
python3 -m pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py

- name: Run interpreter tests
if: ${{ matrix.runner[0] == 'h100-runner-set' }}
env:
Expand Down Expand Up @@ -416,15 +415,27 @@ jobs:
cd python/test/unit
pytest --capture=tee-sys -rfs -n 12 language runtime \
--ignore=language/test_line_info.py \
--ignore=test_debug.py
--ignore=test_debug.py \
--ignore=test_address_sanitizer.py
# TODO: uncomment
# pytest --capture=tee-sys -rfs test_debug.py
TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \
pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py

# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s -n 8 language/test_line_info.py

- name: Run asan tests on HIP
run: |
cd python/test/unit
ulimit -s 1024
export PATH=$(find ~/.triton/llvm -name llvm-symbolizer -printf '%h\n'):$PATH
export LD_LIBRARY_PATH=$(find /opt -name libclang_rt.asan-x86_64.so -printf '%h\n'):$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$(find /opt -type d -wholename *lib/llvm/lib/asan):$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$(find /usr -name libcaffe2_nvrtc.so -printf '%h\n'):$LD_LIBRARY_PATH
export CLANG_ASAN_LIB=$(find /opt -name libclang_rt.asan-x86_64.so)
export HIP_ASAN_LIB=$(find /opt -wholename *lib/asan/libamdhip64.so)
ASAN_OPTIONS=detect_leaks=0,alloc_dealloc_mismatch=0 \
LD_PRELOAD=$CLANG_ASAN_LIB:$HIP_ASAN_LIB python3 -m pytest -s test_address_sanitizer.py
- name: Run regression tests
run: |
# Reenable test_functional_regression.py once it's fixed
Expand Down
47 changes: 47 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# This is not the build system, just a helper to run common development commands.
# Make sure to first initialize the build system with:
# make dev-install

PYTHON := python
BUILD_DIR := $(shell cd python; $(PYTHON) -c 'from build_helpers import get_cmake_dir; print(get_cmake_dir())')
TRITON_OPT := $(BUILD_DIR)/bin/triton-opt

.PHONY: all
all:
ninja -C $(BUILD_DIR)

.PHONY: triton-opt
triton-opt:
ninja -C $(BUILD_DIR) triton-opt

.PHONY: test-lit
test-lit:
ninja -C $(BUILD_DIR) check-triton-lit-tests

.PHONY: test-cpp
test-cpp:
ninja -C $(BUILD_DIR) check-triton-unit-tests

.PHONY: test-python
test-python: all
$(PYTHON) -m pytest python/test/unit

.PHONY: test
test: test-lit test-cpp test-python

.PHONY: dev-install
dev-install:
# build-time dependencies
$(PYTHON) -m pip install ninja cmake wheel pybind11
# test dependencies
$(PYTHON) -m pip install scipy numpy torch pytest lit pandas matplotlib
$(PYTHON) -m pip install -e python --no-build-isolation -v

.PHONY: golden-samples
golden-samples: triton-opt
$(TRITON_OPT) test/TritonGPU/samples/simulated-grouped-gemm.mlir.in -tritongpu-loop-scheduling -tritongpu-pipeline -canonicalize | \
$(PYTHON) utils/generate-test-checks.py --source test/TritonGPU/samples/simulated-grouped-gemm.mlir.in --source_delim_regex="\bmodule" \
-o test/TritonGPU/samples/simulated-grouped-gemm.mlir
$(TRITON_OPT) test/TritonGPU/samples/descriptor-matmul-pipeline.mlir.in -tritongpu-loop-scheduling -tritongpu-pipeline -canonicalize | \
$(PYTHON) utils/generate-test-checks.py --source test/TritonGPU/samples/descriptor-matmul-pipeline.mlir.in --source_delim_regex="\bmodule" \
-o test/TritonGPU/samples/descriptor-matmul-pipeline.mlir
42 changes: 15 additions & 27 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,36 +130,15 @@ There currently isn't a turnkey way to run all the Triton tests, but you can
follow the following recipe.

```shell
# One-time setup. Note we have to reinstall local Triton because torch
# One-time setup. Note this will reinstall local Triton because torch
# overwrites it with the public version.
$ pip install scipy numpy torch pytest lit pandas matplotlib && pip install -e python
$ make dev-install

# Run Python tests using your local GPU.
$ python3 -m pytest python/test/unit
# To run all tests (requires a GPU)
$ make test

# Move to builddir. Fill in <...> with the full path, e.g.
# `cmake.linux-x86_64-cpython-3.11`.
$ cd python/build/cmake<...>

# Run C++ unit tests.
$ ctest -j32

# Run lit tests.
$ lit test
```

You may find it helpful to make a symlink to the builddir and tell your local
git to ignore it.

```shell
$ ln -s python/build/cmake<...> build
$ echo build >> .git/info/exclude
```

Then you can e.g. rebuild and run lit with the following command.

```shell
$ ninja -C build && ( cd build ; lit test )
# Or, to run tests without a gpu
$ make test-cpp test-lit
```

# Tips for hacking
Expand Down Expand Up @@ -193,6 +172,15 @@ For detailed instructions on how to debug Triton's frontend, please refer to thi
separated values to be specified (eg
`TRITON_LLVM_DEBUG_ONLY="tritongpu-remove-layout-conversions` or
`TRITON_LLVM_DEBUG_ONLY="tritongpu-remove-layout-conversions,regalloc"`).
- `TRITON_ENABLE_ASAN=1` invokes the LLVM address sanitizer for
memory leak and out of bounds access detection. Currently only supported on the AMD
backend. This must be run using the ASAN libraries documented [here](https://rocm.docs.amd.com/projects/llvm-project/en/latest/conceptual/using-gpu-sanitizer.html).

When enabling the address sanitizer it is recommended to disable various memory caching strategies
both within the ROCm stack and PyTorch. This will give the address sanitizer the best chance at finding the
memory fault where it originates. This can be done through the HSA_DISABLE_FRAGMENT_ALLOCATOR, AMD_PYTORCH_NO_CUDA_MEMORY_CACHING,
and PYTORCH_NO_HIP_MEMORY_CACHING environment variables.

- `USE_IR_LOC={ttir,ttgir}` reparses the IR such that the location information
will be the line number of the IR file with that particular extension,
instead of line number of the python file. This can provide a direct mapping
Expand Down
1 change: 1 addition & 0 deletions include/triton/Tools/Sys/GetEnv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ inline const std::set<std::string> CACHE_INVALIDATING_ENV_VARS = {
"TRITON_HIP_STREAM_PREFETCH",
"TRITON_HIP_USE_BLOCK_PINGPONG",
"TRITON_LLVM_DEBUG_ONLY",
"TRITON_ENABLE_ASAN",
"USE_IR_LOC",
"NVPTX_ENABLE_DUMP",
"TRITON_INTEL_ADVANCED_PATH",
Expand Down
17 changes: 17 additions & 0 deletions python/build_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import os
import sysconfig
import sys
from pathlib import Path


def get_base_dir():
return os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))


def get_cmake_dir():
plat_name = sysconfig.get_platform()
python_version = sysconfig.get_python_version()
dir_name = f"cmake.{plat_name}-{sys.implementation.name}-{python_version}"
cmake_dir = Path(get_base_dir()) / "python" / "build" / dir_name
cmake_dir.mkdir(parents=True, exist_ok=True)
return cmake_dir
15 changes: 2 additions & 13 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

import pybind11

from build_helpers import get_base_dir, get_cmake_dir


@dataclass
class Backend:
Expand Down Expand Up @@ -345,19 +347,6 @@ def download_and_copy(name, src_path, dst_path, variable, version, url_func):
# ---- cmake extension ----


def get_base_dir():
return os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))


def get_cmake_dir():
plat_name = sysconfig.get_platform()
python_version = sysconfig.get_python_version()
dir_name = f"cmake.{plat_name}-{sys.implementation.name}-{python_version}"
cmake_dir = Path(get_base_dir()) / "python" / "build" / dir_name
cmake_dir.mkdir(parents=True, exist_ok=True)
return cmake_dir


class CMakeClean(clean):

def initialize_options(self):
Expand Down
17 changes: 16 additions & 1 deletion python/src/llvm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h"
#include <csignal>
#include <memory>
#include <pybind11/pybind11.h>
Expand Down Expand Up @@ -217,7 +219,14 @@ void init_triton_llvm(py::module &&m) {
.def("set_calling_conv", &llvm::Function::setCallingConv)
.def("add_fn_attr", [](llvm::Function *fn, std::string &name,
std::string &val) { fn->addFnAttr(name, val); })

.def("add_fn_asan_attr",
[](llvm::Function *fn) {
fn->addFnAttr(llvm::Attribute::SanitizeAddress);
})
.def("add_fn_target_feature",
[](llvm::Function *fn, std::string &val) {
fn->addFnAttr("target-features", val);
})
// Sets the nvvm.maxreg property on the given function.
.def("set_nvvm_maxnreg",
[](llvm::Function *fn, int maxnreg) {
Expand Down Expand Up @@ -377,6 +386,12 @@ void init_triton_llvm(py::module &&m) {
fpm.addPass(BreakStructPhiNodesPass());
fpm.addPass(InstCombinePass());
});
bool enableAddressSanitizer =
mlir::triton::tools::getBoolEnv("TRITON_ENABLE_ASAN");
if (enableAddressSanitizer) {
AddressSanitizerOptions Opts;
mpm.addPass(AddressSanitizerPass(Opts));
}
mpm.addPass(pb.buildPerModuleDefaultPipeline(opt));
mpm.run(*mod, mam);
},
Expand Down
33 changes: 33 additions & 0 deletions python/test/unit/address_sanitizer_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import torch
import triton
import triton.language as tl

size = 4096
x = torch.rand(size, device='cuda')
y = torch.rand(size, device='cuda')
output = torch.empty_like(x)
n_elements = output.numel()
grid = lambda meta: (triton.cdiv(n_elements, meta['BLOCK_SIZE']), )


@triton.jit
def add_kernel(
x_ptr,
y_ptr,
output_ptr,
n_elements,
BLOCK_SIZE: tl.constexpr,
):
pid = tl.program_id(axis=0)
block_start = pid * BLOCK_SIZE
#Set access to go out of bounds for ASAN test
offsets = block_start + tl.arange(0, BLOCK_SIZE) + 1
x = tl.load(x_ptr + offsets)
y = tl.load(y_ptr + offsets)
output = x + y
tl.store(output_ptr + offsets, output)


pgm = add_kernel[grid](x, y, output, n_elements, BLOCK_SIZE=1024)
amdgcn = pgm.asm['amdgcn']
print(amdgcn)
Loading
Loading