Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ endif()
# Compiler flags
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)

# Third-party
include_directories(${PYBIND11_INCLUDE_DIR})

if(WIN32)
SET(BUILD_SHARED_LIBS OFF)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/deps/dlfcn-win32/src)
Expand Down Expand Up @@ -60,7 +63,7 @@ if("${LLVM_LIBRARY_DIR}" STREQUAL "")
# sometimes we don't want to use llvm-config, since it may have been downloaded for some specific linux distros
else()
set(LLVM_LDFLAGS "-L${LLVM_LIBRARY_DIR}")
set(LLVM_LIBRARIES
set(LLVM_LIBRARIES
libLLVMNVPTXCodeGen.a
libLLVMNVPTXDesc.a
libLLVMNVPTXInfo.a
Expand Down
60 changes: 40 additions & 20 deletions lib/codegen/transform/inline.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,36 +53,56 @@ void inliner::do_inline(ir::function* fn, ir::call_inst* callsite, ir::builder&
for(size_t k = 0; k < fn->args().size(); k++)
arg_map[fn->args()[k]] = callsite->ops()[k];
std::vector<ir::basic_block*> rpo = ir::cfg::reverse_post_order(fn);
// clone instructions
for(size_t i = 0; i < new_blocks.size(); i++){
ir::basic_block* old_block = fn->blocks()[i];
ir::basic_block* new_block = new_blocks[i];
builder.set_insert_point(new_block);
for(ir::instruction* old_inst: old_block->get_inst_list()){
// clone instruction
ir::instruction* new_inst = old_inst->clone();
// replace basic block
for(size_t k = 0; k < new_blocks.size(); k++)
new_inst->replace_uses_of_with(fn->blocks()[k], new_blocks[k]);
// replace values
for(size_t k = 0; k < new_inst->get_num_operands(); k++){
ir::value* op = new_inst->get_operand(k);
if(auto arg_op = dynamic_cast<ir::argument*>(op))
new_inst->set_operand(k, arg_map.at(arg_op));
if(auto inst_op = dynamic_cast<ir::instruction*>(op))
if(inst_map.find(inst_op) != inst_map.end())
new_inst->set_operand(k, inst_map.at(inst_op));
}
// `ret` instruction is a special case:
// instead of returning we need to branch to after the function call
if(ir::return_inst* ret = dynamic_cast<ir::return_inst*>(new_inst)){
if(ir::value* ret_val = ret->get_return_value())
exit_val->add_incoming(ret_val, new_block);
new_inst = ir::branch_inst::create(exit);
}
inst_map[old_inst] = new_inst;
builder.insert(new_inst);
}
}
// update basic blocks
for(size_t i = 0; i < new_blocks.size(); i++) {
for (ir::instruction* new_inst: new_blocks[i]->get_inst_list()) {
// replace basic use cases
for(size_t k = 0; k < new_blocks.size(); k++)
new_inst->replace_uses_of_with(fn->blocks()[k], new_blocks[k]);
if(ir::phi_node* phi = dynamic_cast<ir::phi_node*>(new_inst)) {
// additionally replace basic blocks of phi-nodes since
// replace_uses_of_with() does not replace them.
for(unsigned in = 0; in < phi->get_num_incoming(); in++)
for(size_t k = 0; k < new_blocks.size(); k++)
if (phi->get_incoming_block(in) == fn->blocks()[k])
phi->set_incoming_block(in, new_blocks[k]);
}
}
}
// replace operands of instructions after constructing inst_map
for (auto& it: inst_map) {
ir::instruction* new_inst = it.second;
for(size_t k = 0; k < new_inst->get_num_operands(); k++) {
ir::value* op = new_inst->get_operand(k);
if(auto arg_op = dynamic_cast<ir::argument*>(op))
new_inst->set_operand(k, arg_map.at(arg_op));
if(auto inst_op = dynamic_cast<ir::instruction*>(op))
if(inst_map.find(inst_op) != inst_map.end())
new_inst->set_operand(k, inst_map.at(inst_op));
}
// handles a ret instruciton.
// instead of returning we need to branch to after the function call
if(ir::return_inst* ret = dynamic_cast<ir::return_inst*>(new_inst)) {
if(ir::value* ret_val = ret->get_return_value())
exit_val->add_incoming(ret_val, new_inst->get_parent());
// replace ret with branch
ir::instruction* new_br_inst = ir::branch_inst::create(exit);
builder.set_insert_point(new_inst->get_parent());
builder.insert(new_br_inst);
new_inst->erase_from_parent();
}
}
if(exit_val->get_num_incoming() == 1)
exit_val->replace_all_uses_with(exit_val->get_incoming_value(0));
// done -- make sure insert point is properly set to exit block
Expand Down
2 changes: 0 additions & 2 deletions lib/driver/llvm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,12 @@ std::string ptx_to_cubin(const std::string& ptx, const std::string& ptxas, int c
unlink(_flog);
throw std::runtime_error("Internal Triton PTX codegen error: \n" + log);
}
CUmodule ret;
std::ifstream _cubin(_fbin, std::ios::binary );
std::string cubin(std::istreambuf_iterator<char>(_cubin), {});
_cubin.close();
unlink(_fsrc);
unlink(_flog);
unlink(_fbin);
dispatch::cuModuleLoadData(&ret, cubin.c_str());
return cubin;
}

Expand Down
76 changes: 48 additions & 28 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import tarfile
import urllib.request
from distutils.version import LooseVersion
from typing import NamedTuple

from setuptools import Extension, setup
from setuptools.command.build_ext import build_ext
Expand All @@ -28,33 +29,53 @@ def get_build_type():
return "Release"


def get_llvm():
# tries to find system LLVM
def use_system_llvm():
if platform.system() == "Windows":
return True
versions = ['-11.0', '-11', '-11-64']
supported = ['llvm-config{v}'.format(v=v) for v in versions]
paths = [distutils.spawn.find_executable(cfg) for cfg in supported]
paths = [p for p in paths if p is not None]
if paths:
return '', ''
if platform.system() == "Windows":
return '', ''
# download if nothing is installed
name = 'clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04'
dir = os.path.join(os.environ["HOME"], ".triton", "llvm")
llvm_include_dir = '{dir}/{name}/include'.format(dir=dir, name=name)
llvm_library_dir = '{dir}/{name}/lib'.format(dir=dir, name=name)
if not os.path.exists(llvm_library_dir):
os.makedirs(dir, exist_ok=True)
try:
shutil.rmtree(os.path.join(dir, name))
except Exception:
pass
url = "https://github.com/llvm/llvm-project/releases/download/llvmorg-11.0.1/{name}.tar.xz".format(name=name)
print('downloading and extracting ' + url + '...')
ftpstream = urllib.request.urlopen(url)
file = tarfile.open(fileobj=ftpstream, mode="r|xz")
file.extractall(path=dir)
return llvm_include_dir, llvm_library_dir
return any(p is not None for p in paths)


def get_thirdparty_packages(triton_cache_path):
class Package(NamedTuple):
package: str
name: str
url: str
test_file: str
include_flag: str
lib_flag: str

packages = [
Package("pybind11", "pybind11-2.10.0", "https://github.com/pybind/pybind11/archive/refs/tags/v2.10.0.tar.gz", "include/pybind11/pybind11.h", "PYBIND11_INCLUDE_DIR", "")
]
if not use_system_llvm():
# donwload LLVM if no suitable system LLVM is installed
packages.append(
Package("llvm", "clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04", "https://github.com/llvm/llvm-project/releases/download/llvmorg-11.0.1/clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04.tar.xz", "lib", "LLVM_INCLUDE_DIRS", "LLVM_LIBRARY_DIR")
)

thirdparty_cmake_args = []
for p in packages:
package_root_dir = os.path.join(triton_cache_path, p.package)
package_dir = os.path.join(package_root_dir, p.name)
test_file_path = os.path.join(package_dir, p.test_file)
if not os.path.exists(test_file_path):
try:
shutil.rmtree(package_root_dir)
except Exception:
pass
os.makedirs(package_root_dir, exist_ok=True)
print('downloading and extracting {} ...'.format(p.url))
ftpstream = urllib.request.urlopen(p.url)
file = tarfile.open(fileobj=ftpstream, mode="r|*")
file.extractall(path=package_root_dir)
if p.include_flag:
thirdparty_cmake_args.append("-D{}={}/include".format(p.include_flag, package_dir))
if p.lib_flag:
thirdparty_cmake_args.append("-D{}={}/lib".format(p.lib_flag, package_dir))
return thirdparty_cmake_args


class CMakeExtension(Extension):
Expand Down Expand Up @@ -92,7 +113,8 @@ def run(self):
self.build_extension(ext)

def build_extension(self, ext):
llvm_include_dir, llvm_library_dir = get_llvm()
triton_cache_path = os.path.join(os.environ["HOME"], ".triton")
thirdparty_cmake_args = get_thirdparty_packages(triton_cache_path)
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path)))
# create build directories
if not os.path.exists(self.build_temp):
Expand All @@ -103,12 +125,10 @@ def build_extension(self, ext):
"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir,
"-DBUILD_TUTORIALS=OFF",
"-DBUILD_PYTHON_MODULE=ON",
"-DLLVM_INCLUDE_DIRS=" + llvm_include_dir,
"-DLLVM_LIBRARY_DIR=" + llvm_library_dir,
# '-DPYTHON_EXECUTABLE=' + sys.executable,
# '-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON',
"-DPYTHON_INCLUDE_DIRS=" + ";".join(python_include_dirs)
]
] + thirdparty_cmake_args
# configuration
cfg = get_build_type()
build_args = ["--config", cfg]
Expand Down
Loading