diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f26cf764d00..91ffa9427e5 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -329,6 +329,31 @@ if(NOT BUILD_SHARED_LIBS) endif() endif() +set(CUDF_LTO_FLAGS -fatbin --relocatable-device-code=true --gen-opt-lto --ltoir -dlto) +set(CUDF_LTO_CXX_STANDARD 20) + +function(add_lto_ir_target target_name source_file) + get_target_property(libcudacxx_raw_includes CCCL::libcudacxx INTERFACE_INCLUDE_DIRECTORIES) + set(includes) + foreach(inc IN LISTS libcudacxx_raw_includes CUDAToolkit_INCLUDE_DIRS) + list(APPEND includes "-I${inc}") + endforeach() + set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${source_file}) + add_custom_target( + ${target_name} + COMMAND + ${CMAKE_CUDA_COMPILER} -std=c++${CUDF_LTO_CXX_STANDARD} ${CUDF_LTO_FLAGS} + -I${CUDF_SOURCE_DIR}/include -I${CUDF_SOURCE_DIR}/src -I${CMAKE_CURRENT_SOURCE_DIR} ${includes} -D__CUDACC_RTC_ -DCUDF_RUNTIME_JIT -x cu -c + ${SOURCE_FILE} -o ${target_name} + COMMENT "Generating LTO fatbin ${target_name}: ${SOURCE_FILE}" + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + DEPENDS ${SOURCE_FILE} + VERBATIM + ) +endfunction() + +add_lto_ir_target(cudf_lto_kernel.fatbin src/transform/jit/lto_def.cu) + # ################################################################################################## # * library targets ------------------------------------------------------------------------------- add_library( @@ -814,6 +839,7 @@ add_library( src/transform/nans_to_nulls.cu src/transform/one_hot_encode.cu src/transform/row_bit_count.cu + src/transform/jit/launch.cpp src/transform/transform.cpp src/transpose/transpose.cu src/unary/cast_ops.cu @@ -835,6 +861,8 @@ add_library( src/utilities/type_dispatcher.cpp ) +add_dependencies(cudf cudf_lto_kernel.fatbin) + # Anything that includes jitify needs to be compiled with _FILE_OFFSET_BITS=64 due to a limitation # in how conda builds glibc set_source_files_properties( diff --git a/cpp/src/transform/jit/launch.cpp b/cpp/src/transform/jit/launch.cpp new file mode 100644 index 00000000000..fdc928c2de5 --- /dev/null +++ b/cpp/src/transform/jit/launch.cpp @@ -0,0 +1,28 @@ + + +#include + +#include +#include +#include + +#define CHECK_JITLINK_ERROR(...) \ + if (auto err = __VA_ARGS__; err != NVJITLINK_SUCCESS) { \ + fprintf(stderr, "JITLink error: %d\n", err); \ + exit(EXIT_FAILURE); \ + } + +void load_jit_kernel() +{ + const char* smbuf = "-arch=sm_86"; + nvJitLinkHandle handle; + const char* lopts[] = {"-lto", smbuf}; + CHECK_JITLINK_ERROR(nvJitLinkCreate(&handle, 2, lopts)); + CHECK_JITLINK_ERROR(nvJitLinkAddFile( + handle, NVJITLINK_INPUT_FATBIN, "/home/coder/cudf/cpp/build/cudf_lto_operation.fatbin")); + CHECK_JITLINK_ERROR(nvJitLinkAddFile( + handle, NVJITLINK_INPUT_FATBIN, "/home/coder/cudf/cpp/build/cudf_lto_operators.fatbin")); + CHECK_JITLINK_ERROR(nvJitLinkAddFile( + handle, NVJITLINK_INPUT_FATBIN, "/home/coder/cudf/cpp/build/cudf_lto_kernel.fatbin")); + CHECK_JITLINK_ERROR(nvJitLinkComplete(handle)); +} diff --git a/cpp/src/transform/jit/lto_decl.cuh b/cpp/src/transform/jit/lto_decl.cuh new file mode 100644 index 00000000000..d3099d5212e --- /dev/null +++ b/cpp/src/transform/jit/lto_decl.cuh @@ -0,0 +1,832 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once +#include + +namespace CUDF_EXPORT cudf { + +namespace lto { + +/** + * @brief LTO-JIT functions and thunk types + * + * These are declarations for functions that will be used in LTO-JIT compiled code. + * They are pre-compiled into a device library that is linked at JIT compile time. + * This header should be minimal and only contain necessary types and function declarations as it + * will be included and compiled at JIT compile time. Including other headers will lead to longer + * JIT compile times which can be unbounded and cause slowdowns. + * + */ + +using size_type = int32_t; + +using bitmask_type = uint32_t; + +enum class type_id : int32_t {}; + +enum scale_type : int32_t {}; + +struct __attribute__((may_alias)) data_type { + type_id __id = {}; + int32_t __fixed_point_scale = 0; +}; + +struct __attribute__((may_alias)) string_view { + char const* __data = nullptr; + size_type __bytes = 0; + mutable size_type __length = 0; +}; + +struct __attribute__((may_alias)) decimal32 { + int32_t __value = 0; + scale_type __scale = scale_type{}; +}; + +struct __attribute__((may_alias)) decimal64 { + int64_t __value = 0; + scale_type __scale = scale_type{}; +}; + +struct __attribute__((may_alias)) decimal128 { + __int128_t __value = 0; + scale_type __scale = scale_type{}; +}; + +struct __attribute__((may_alias)) timestamp_D { + int32_t __rep = 0; +}; + +struct __attribute__((may_alias)) timestamp_h { + int32_t __rep = 0; +}; + +struct __attribute__((may_alias)) timestamp_m { + int32_t __rep = 0; +}; + +struct __attribute__((may_alias)) timestamp_s { + int64_t __rep = 0; +}; + +struct __attribute__((may_alias)) timestamp_ms { + int64_t __rep = 0; +}; + +struct __attribute__((may_alias)) timestamp_us { + int64_t __rep = 0; +}; + +struct __attribute__((may_alias)) timestamp_ns { + int64_t __rep = 0; +}; + +struct __attribute__((may_alias)) duration_D { + int32_t __rep = 0; +}; + +struct __attribute__((may_alias)) duration_h { + int32_t __rep = 0; +}; + +struct __attribute__((may_alias)) duration_m { + int32_t __rep = 0; +}; + +struct __attribute__((may_alias)) duration_s { + int64_t __rep = 0; +}; + +struct __attribute__((may_alias)) duration_ms { + int64_t __rep = 0; +}; + +struct __attribute__((may_alias)) duration_us { + int64_t __rep = 0; +}; + +struct __attribute__((may_alias)) duration_ns { + int64_t __rep = 0; +}; + +struct inplace_t {}; + +inline constexpr inplace_t inplace{}; + +// [ ] assumes T is trivially copyable +template +struct __attribute__((may_alias)) optional { + T __val; + bool __engaged; + + __device__ constexpr optional() : __val{}, __engaged{false} {} + + template + __device__ constexpr optional(inplace_t, Args&&... args) + : __val{static_cast(args)...}, __engaged{true} + { + } + + __device__ constexpr optional(T val) : __val{val}, __engaged{true} {} + + constexpr optional(optional const&) = default; + + constexpr optional(optional&&) = default; + + constexpr optional& operator=(optional const&) = default; + + constexpr optional& operator=(optional&&) = default; + + constexpr ~optional() = default; + + __device__ constexpr bool has_value() const { return __engaged; } + + __device__ constexpr void reset() { __engaged = false; } + + __device__ constexpr T const& get() const { return __val; } + + __device__ constexpr T& get() { return __val; } + + __device__ constexpr T const* operator->() const { return &__val; } + + __device__ constexpr T* operator->() { return &__val; } + + __device__ constexpr T const& operator*() const { return __val; } + + __device__ constexpr T& operator*() { return __val; } + + __device__ constexpr T const& value() const { return __val; } + + __device__ constexpr T& value() { return __val; } + + __device__ constexpr explicit operator bool() const { return __engaged; } + + __device__ constexpr T value_or(T __v) const { return __engaged ? __val : __v; } +}; + +template +optional(T) -> optional; + +struct alignas(16) __attribute__((may_alias)) column_device_view_core { + data_type __type = {}; + size_type __size = 0; + void const* __data = nullptr; + bitmask_type const* __null_mask = nullptr; + size_type __offset = 0; + column_device_view_core* __d_children = nullptr; + size_type __num_children = 0; + + template + __device__ T const* head() const; + + __device__ size_type size() const; + + __device__ bool nullable() const; + + __device__ bitmask_type const* null_mask() const; + + __device__ size_type offset() const; + + __device__ bool is_valid(size_type idx) const; + + __device__ bool is_valid_nocheck(size_type idx) const; + + __device__ bool is_null(size_type idx) const; + + __device__ bool is_null_nocheck(size_type idx) const; + + __device__ bitmask_type get_mask_word(size_type word_index) const; + + template + __device__ T element(size_type idx) const; + + __device__ size_type num_child_columns() const; +}; + +#define CUDF_LTO_DECL(Type) \ + extern template __device__ Type const* column_device_view_core::head() const; + +CUDF_LTO_DECL(bool) +CUDF_LTO_DECL(int8_t) +CUDF_LTO_DECL(int16_t) +CUDF_LTO_DECL(int32_t) +CUDF_LTO_DECL(int64_t) +CUDF_LTO_DECL(uint8_t) +CUDF_LTO_DECL(uint16_t) +CUDF_LTO_DECL(uint32_t) +CUDF_LTO_DECL(uint64_t) +CUDF_LTO_DECL(float) +CUDF_LTO_DECL(double) +CUDF_LTO_DECL(string_view) +CUDF_LTO_DECL(timestamp_D) +CUDF_LTO_DECL(timestamp_h) +CUDF_LTO_DECL(timestamp_m) +CUDF_LTO_DECL(timestamp_s) +CUDF_LTO_DECL(timestamp_ms) +CUDF_LTO_DECL(timestamp_us) +CUDF_LTO_DECL(timestamp_ns) +CUDF_LTO_DECL(duration_D) +CUDF_LTO_DECL(duration_h) +CUDF_LTO_DECL(duration_m) +CUDF_LTO_DECL(duration_s) +CUDF_LTO_DECL(duration_ms) +CUDF_LTO_DECL(duration_us) +CUDF_LTO_DECL(duration_ns) + +#undef CUDF_LTO_DECL + +#define CUDF_LTO_DECL(Type) \ + extern template __device__ Type column_device_view_core::element(size_type idx) const; + +CUDF_LTO_DECL(bool) +CUDF_LTO_DECL(int8_t) +CUDF_LTO_DECL(int16_t) +CUDF_LTO_DECL(int32_t) +CUDF_LTO_DECL(int64_t) +CUDF_LTO_DECL(uint8_t) +CUDF_LTO_DECL(uint16_t) +CUDF_LTO_DECL(uint32_t) +CUDF_LTO_DECL(uint64_t) +CUDF_LTO_DECL(decimal32) +CUDF_LTO_DECL(decimal64) +CUDF_LTO_DECL(decimal128) +CUDF_LTO_DECL(float) +CUDF_LTO_DECL(double) +CUDF_LTO_DECL(string_view) +CUDF_LTO_DECL(timestamp_D) +CUDF_LTO_DECL(timestamp_h) +CUDF_LTO_DECL(timestamp_m) +CUDF_LTO_DECL(timestamp_s) +CUDF_LTO_DECL(timestamp_ms) +CUDF_LTO_DECL(timestamp_us) +CUDF_LTO_DECL(timestamp_ns) +CUDF_LTO_DECL(duration_D) +CUDF_LTO_DECL(duration_h) +CUDF_LTO_DECL(duration_m) +CUDF_LTO_DECL(duration_s) +CUDF_LTO_DECL(duration_ms) +CUDF_LTO_DECL(duration_us) +CUDF_LTO_DECL(duration_ns) + +#undef CUDF_LTO_DECL + +struct alignas(16) __attribute__((may_alias)) mutable_column_device_view_core { + data_type __type = {}; + size_type __size = 0; + void const* __data = nullptr; + bitmask_type const* __null_mask = nullptr; + size_type __offset = 0; + mutable_column_device_view_core* __d_children = nullptr; + size_type __num_children = 0; + + template + __device__ T* head() const; + + __device__ size_type size() const; + + __device__ bool nullable() const; + + __device__ bitmask_type* null_mask() const; + + __device__ size_type offset() const; + + __device__ bool is_valid(size_type idx) const; + + __device__ bool is_valid_nocheck(size_type idx) const; + + __device__ bool is_null(size_type idx) const; + + __device__ bool is_null_nocheck(size_type idx) const; + + __device__ bitmask_type get_mask_word(size_type word_index) const; + + template + __device__ T element(size_type idx) const; + + __device__ size_type num_child_columns() const; +}; + +#define CUDF_LTO_DECL(Type) \ + extern template __device__ Type* mutable_column_device_view_core::head() const; + +CUDF_LTO_DECL(bool) +CUDF_LTO_DECL(int8_t) +CUDF_LTO_DECL(int16_t) +CUDF_LTO_DECL(int32_t) +CUDF_LTO_DECL(int64_t) +CUDF_LTO_DECL(uint8_t) +CUDF_LTO_DECL(uint16_t) +CUDF_LTO_DECL(uint32_t) +CUDF_LTO_DECL(uint64_t) +CUDF_LTO_DECL(float) +CUDF_LTO_DECL(double) +CUDF_LTO_DECL(string_view) +CUDF_LTO_DECL(timestamp_D) +CUDF_LTO_DECL(timestamp_h) +CUDF_LTO_DECL(timestamp_m) +CUDF_LTO_DECL(timestamp_s) +CUDF_LTO_DECL(timestamp_ms) +CUDF_LTO_DECL(timestamp_us) +CUDF_LTO_DECL(timestamp_ns) +CUDF_LTO_DECL(duration_D) +CUDF_LTO_DECL(duration_h) +CUDF_LTO_DECL(duration_m) +CUDF_LTO_DECL(duration_s) +CUDF_LTO_DECL(duration_ms) +CUDF_LTO_DECL(duration_us) +CUDF_LTO_DECL(duration_ns) + +#undef CUDF_LTO_DECL + +#define CUDF_LTO_DECL(Type) \ + extern template __device__ Type mutable_column_device_view_core::element(size_type idx) \ + const; + +CUDF_LTO_DECL(bool) +CUDF_LTO_DECL(int8_t) +CUDF_LTO_DECL(int16_t) +CUDF_LTO_DECL(int32_t) +CUDF_LTO_DECL(int64_t) +CUDF_LTO_DECL(uint8_t) +CUDF_LTO_DECL(uint16_t) +CUDF_LTO_DECL(uint32_t) +CUDF_LTO_DECL(uint64_t) +CUDF_LTO_DECL(decimal32) +CUDF_LTO_DECL(decimal64) +CUDF_LTO_DECL(decimal128) +CUDF_LTO_DECL(float) +CUDF_LTO_DECL(double) +CUDF_LTO_DECL(string_view) +CUDF_LTO_DECL(timestamp_D) +CUDF_LTO_DECL(timestamp_h) +CUDF_LTO_DECL(timestamp_m) +CUDF_LTO_DECL(timestamp_s) +CUDF_LTO_DECL(timestamp_ms) +CUDF_LTO_DECL(timestamp_us) +CUDF_LTO_DECL(timestamp_ns) +CUDF_LTO_DECL(duration_D) +CUDF_LTO_DECL(duration_h) +CUDF_LTO_DECL(duration_m) +CUDF_LTO_DECL(duration_s) +CUDF_LTO_DECL(duration_ms) +CUDF_LTO_DECL(duration_us) +CUDF_LTO_DECL(duration_ns) + +#undef CUDF_LTO_DECL + +namespace operators { + +#define CUDF_LTO_DECL(op, type) \ + extern __device__ void op(type* out, type const* a, type const* b); \ + \ + extern __device__ void op(optional* out, optional const* a, optional const* b) + +CUDF_LTO_DECL(add, int32_t); +CUDF_LTO_DECL(add, int64_t); +CUDF_LTO_DECL(add, uint32_t); +CUDF_LTO_DECL(add, uint64_t); +CUDF_LTO_DECL(add, float); +CUDF_LTO_DECL(add, double); +CUDF_LTO_DECL(add, decimal32); +CUDF_LTO_DECL(add, decimal64); +CUDF_LTO_DECL(add, decimal128); +CUDF_LTO_DECL(add, duration_D); +CUDF_LTO_DECL(add, duration_s); +CUDF_LTO_DECL(add, duration_ms); +CUDF_LTO_DECL(add, duration_ns); + +CUDF_LTO_DECL(sub, int32_t); +CUDF_LTO_DECL(sub, int64_t); +CUDF_LTO_DECL(sub, uint32_t); +CUDF_LTO_DECL(sub, uint64_t); +CUDF_LTO_DECL(sub, float); +CUDF_LTO_DECL(sub, double); +CUDF_LTO_DECL(sub, decimal32); +CUDF_LTO_DECL(sub, decimal64); +CUDF_LTO_DECL(sub, decimal128); +CUDF_LTO_DECL(sub, duration_D); +CUDF_LTO_DECL(sub, duration_s); +CUDF_LTO_DECL(sub, duration_ms); +CUDF_LTO_DECL(sub, duration_ns); + +CUDF_LTO_DECL(mul, int32_t); +CUDF_LTO_DECL(mul, int64_t); +CUDF_LTO_DECL(mul, uint32_t); +CUDF_LTO_DECL(mul, uint64_t); +CUDF_LTO_DECL(mul, float); +CUDF_LTO_DECL(mul, double); +CUDF_LTO_DECL(mul, decimal32); +CUDF_LTO_DECL(mul, decimal64); +CUDF_LTO_DECL(mul, decimal128); + +CUDF_LTO_DECL(div, int32_t); +CUDF_LTO_DECL(div, int64_t); +CUDF_LTO_DECL(div, uint32_t); +CUDF_LTO_DECL(div, uint64_t); +CUDF_LTO_DECL(div, float); +CUDF_LTO_DECL(div, double); +CUDF_LTO_DECL(div, decimal32); +CUDF_LTO_DECL(div, decimal64); +CUDF_LTO_DECL(div, decimal128); + +CUDF_LTO_DECL(mod, float); +CUDF_LTO_DECL(mod, double); + +CUDF_LTO_DECL(pymod, float); +CUDF_LTO_DECL(pymod, double); + +CUDF_LTO_DECL(pow, float); +CUDF_LTO_DECL(pow, double); + +#undef CUDF_LTO_DECL + +#define CUDF_LTO_DECL(op, type) \ + extern __device__ void op(bool* out, type const* a, type const* b); \ + \ + extern __device__ void op(bool* out, optional const* a, optional const* b); \ + \ + extern __device__ void op(optional* out, optional const* a, optional const* b) + +CUDF_LTO_DECL(equal, bool); +CUDF_LTO_DECL(equal, int8_t); +CUDF_LTO_DECL(equal, int16_t); +CUDF_LTO_DECL(equal, int32_t); +CUDF_LTO_DECL(equal, int64_t); +CUDF_LTO_DECL(equal, uint8_t); +CUDF_LTO_DECL(equal, uint16_t); +CUDF_LTO_DECL(equal, uint32_t); +CUDF_LTO_DECL(equal, uint64_t); +CUDF_LTO_DECL(equal, float); +CUDF_LTO_DECL(equal, double); +CUDF_LTO_DECL(equal, decimal32); +CUDF_LTO_DECL(equal, decimal64); +CUDF_LTO_DECL(equal, decimal128); +CUDF_LTO_DECL(equal, timestamp_D); +CUDF_LTO_DECL(equal, timestamp_s); +CUDF_LTO_DECL(equal, timestamp_ms); +CUDF_LTO_DECL(equal, timestamp_us); +CUDF_LTO_DECL(equal, timestamp_ns); +CUDF_LTO_DECL(equal, duration_D); +CUDF_LTO_DECL(equal, duration_s); +CUDF_LTO_DECL(equal, duration_ms); +CUDF_LTO_DECL(equal, duration_ns); +CUDF_LTO_DECL(equal, string_view); + +CUDF_LTO_DECL(null_equal, bool); +CUDF_LTO_DECL(null_equal, int8_t); +CUDF_LTO_DECL(null_equal, int16_t); +CUDF_LTO_DECL(null_equal, int32_t); +CUDF_LTO_DECL(null_equal, int64_t); +CUDF_LTO_DECL(null_equal, uint8_t); +CUDF_LTO_DECL(null_equal, uint16_t); +CUDF_LTO_DECL(null_equal, uint32_t); +CUDF_LTO_DECL(null_equal, uint64_t); +CUDF_LTO_DECL(null_equal, float); +CUDF_LTO_DECL(null_equal, double); +CUDF_LTO_DECL(null_equal, decimal32); +CUDF_LTO_DECL(null_equal, decimal64); +CUDF_LTO_DECL(null_equal, decimal128); +CUDF_LTO_DECL(null_equal, timestamp_D); +CUDF_LTO_DECL(null_equal, timestamp_s); +CUDF_LTO_DECL(null_equal, timestamp_ms); +CUDF_LTO_DECL(null_equal, timestamp_us); +CUDF_LTO_DECL(null_equal, timestamp_ns); +CUDF_LTO_DECL(null_equal, duration_D); +CUDF_LTO_DECL(null_equal, duration_s); +CUDF_LTO_DECL(null_equal, duration_ms); +CUDF_LTO_DECL(null_equal, duration_ns); +CUDF_LTO_DECL(null_equal, string_view); + +#undef CUDF_LTO_DECL + +#define CUDF_LTO_DECL(op, type) \ + extern __device__ void op(bool* out, type const* a, type const* b); \ + \ + extern __device__ void op(optional* out, optional const* a, optional const* b) + +CUDF_LTO_DECL(less, bool); +CUDF_LTO_DECL(less, int8_t); +CUDF_LTO_DECL(less, int16_t); +CUDF_LTO_DECL(less, int32_t); +CUDF_LTO_DECL(less, int64_t); +CUDF_LTO_DECL(less, uint8_t); +CUDF_LTO_DECL(less, uint16_t); +CUDF_LTO_DECL(less, uint32_t); +CUDF_LTO_DECL(less, uint64_t); +CUDF_LTO_DECL(less, float); +CUDF_LTO_DECL(less, double); +CUDF_LTO_DECL(less, decimal32); +CUDF_LTO_DECL(less, decimal64); +CUDF_LTO_DECL(less, decimal128); +CUDF_LTO_DECL(less, timestamp_D); +CUDF_LTO_DECL(less, timestamp_s); +CUDF_LTO_DECL(less, timestamp_ms); +CUDF_LTO_DECL(less, timestamp_us); +CUDF_LTO_DECL(less, timestamp_ns); +CUDF_LTO_DECL(less, duration_D); +CUDF_LTO_DECL(less, duration_s); +CUDF_LTO_DECL(less, duration_ms); +CUDF_LTO_DECL(less, duration_ns); +CUDF_LTO_DECL(less, string_view); + +CUDF_LTO_DECL(greater, bool); +CUDF_LTO_DECL(greater, int8_t); +CUDF_LTO_DECL(greater, int16_t); +CUDF_LTO_DECL(greater, int32_t); +CUDF_LTO_DECL(greater, int64_t); +CUDF_LTO_DECL(greater, uint8_t); +CUDF_LTO_DECL(greater, uint16_t); +CUDF_LTO_DECL(greater, uint32_t); +CUDF_LTO_DECL(greater, uint64_t); +CUDF_LTO_DECL(greater, float); +CUDF_LTO_DECL(greater, double); +CUDF_LTO_DECL(greater, decimal32); +CUDF_LTO_DECL(greater, decimal64); +CUDF_LTO_DECL(greater, decimal128); +CUDF_LTO_DECL(greater, timestamp_D); +CUDF_LTO_DECL(greater, timestamp_s); +CUDF_LTO_DECL(greater, timestamp_ms); +CUDF_LTO_DECL(greater, timestamp_us); +CUDF_LTO_DECL(greater, timestamp_ns); +CUDF_LTO_DECL(greater, duration_D); +CUDF_LTO_DECL(greater, duration_s); +CUDF_LTO_DECL(greater, duration_ms); +CUDF_LTO_DECL(greater, duration_ns); +CUDF_LTO_DECL(greater, string_view); + +CUDF_LTO_DECL(less_equal, bool); +CUDF_LTO_DECL(less_equal, int8_t); +CUDF_LTO_DECL(less_equal, int16_t); +CUDF_LTO_DECL(less_equal, int32_t); +CUDF_LTO_DECL(less_equal, int64_t); +CUDF_LTO_DECL(less_equal, uint8_t); +CUDF_LTO_DECL(less_equal, uint16_t); +CUDF_LTO_DECL(less_equal, uint32_t); +CUDF_LTO_DECL(less_equal, uint64_t); +CUDF_LTO_DECL(less_equal, float); +CUDF_LTO_DECL(less_equal, double); +CUDF_LTO_DECL(less_equal, decimal32); +CUDF_LTO_DECL(less_equal, decimal64); +CUDF_LTO_DECL(less_equal, decimal128); +CUDF_LTO_DECL(less_equal, timestamp_D); +CUDF_LTO_DECL(less_equal, timestamp_s); +CUDF_LTO_DECL(less_equal, timestamp_ms); +CUDF_LTO_DECL(less_equal, timestamp_us); +CUDF_LTO_DECL(less_equal, timestamp_ns); +CUDF_LTO_DECL(less_equal, duration_D); +CUDF_LTO_DECL(less_equal, duration_s); +CUDF_LTO_DECL(less_equal, duration_ms); +CUDF_LTO_DECL(less_equal, duration_ns); +CUDF_LTO_DECL(less_equal, string_view); + +CUDF_LTO_DECL(greater_equal, bool); +CUDF_LTO_DECL(greater_equal, int8_t); +CUDF_LTO_DECL(greater_equal, int16_t); +CUDF_LTO_DECL(greater_equal, int32_t); +CUDF_LTO_DECL(greater_equal, int64_t); +CUDF_LTO_DECL(greater_equal, uint8_t); +CUDF_LTO_DECL(greater_equal, uint16_t); +CUDF_LTO_DECL(greater_equal, uint32_t); +CUDF_LTO_DECL(greater_equal, uint64_t); +CUDF_LTO_DECL(greater_equal, float); +CUDF_LTO_DECL(greater_equal, double); +CUDF_LTO_DECL(greater_equal, decimal32); +CUDF_LTO_DECL(greater_equal, decimal64); +CUDF_LTO_DECL(greater_equal, decimal128); +CUDF_LTO_DECL(greater_equal, timestamp_D); +CUDF_LTO_DECL(greater_equal, timestamp_s); +CUDF_LTO_DECL(greater_equal, timestamp_ms); +CUDF_LTO_DECL(greater_equal, timestamp_us); +CUDF_LTO_DECL(greater_equal, timestamp_ns); +CUDF_LTO_DECL(greater_equal, duration_D); +CUDF_LTO_DECL(greater_equal, duration_s); +CUDF_LTO_DECL(greater_equal, duration_ms); +CUDF_LTO_DECL(greater_equal, duration_ns); +CUDF_LTO_DECL(greater_equal, string_view); + +#undef CUDF_LTO_DECL + +#define CUDF_LTO_DECL(op, type) \ + extern __device__ void op(type* out, type const* a, type const* b); \ + \ + extern __device__ void op(optional* out, optional const* a, optional const* b) + +CUDF_LTO_DECL(bitwise_and, int32_t); +CUDF_LTO_DECL(bitwise_and, int64_t); +CUDF_LTO_DECL(bitwise_and, uint32_t); +CUDF_LTO_DECL(bitwise_and, uint64_t); + +CUDF_LTO_DECL(bitwise_or, int32_t); +CUDF_LTO_DECL(bitwise_or, int64_t); +CUDF_LTO_DECL(bitwise_or, uint32_t); +CUDF_LTO_DECL(bitwise_or, uint64_t); + +CUDF_LTO_DECL(bitwise_xor, int32_t); +CUDF_LTO_DECL(bitwise_xor, int64_t); +CUDF_LTO_DECL(bitwise_xor, uint32_t); +CUDF_LTO_DECL(bitwise_xor, uint64_t); + +#undef CUDF_LTO_DECL + +#define CUDF_LTO_DECL(op, type) \ + extern __device__ void op(type* out, type const* a, type const* b); \ + \ + extern __device__ void op(optional* out, optional const* a, optional const* b); + +CUDF_LTO_DECL(logical_and, bool); + +CUDF_LTO_DECL(null_logical_and, bool); + +CUDF_LTO_DECL(logical_or, bool); + +CUDF_LTO_DECL(null_logical_or, bool); + +#undef CUDF_LTO_DECL + +#define CUDF_LTO_DECL(op, type) \ + extern __device__ void op(type* out, type const* a); \ + \ + extern __device__ void op(optional* out, optional const* a) + +CUDF_LTO_DECL(identity, bool); +CUDF_LTO_DECL(identity, int8_t); +CUDF_LTO_DECL(identity, int16_t); +CUDF_LTO_DECL(identity, int32_t); +CUDF_LTO_DECL(identity, int64_t); +CUDF_LTO_DECL(identity, uint8_t); +CUDF_LTO_DECL(identity, uint16_t); +CUDF_LTO_DECL(identity, uint32_t); +CUDF_LTO_DECL(identity, uint64_t); +CUDF_LTO_DECL(identity, float); +CUDF_LTO_DECL(identity, double); +CUDF_LTO_DECL(identity, decimal32); +CUDF_LTO_DECL(identity, decimal64); +CUDF_LTO_DECL(identity, decimal128); +CUDF_LTO_DECL(identity, timestamp_D); +CUDF_LTO_DECL(identity, timestamp_s); +CUDF_LTO_DECL(identity, timestamp_ms); +CUDF_LTO_DECL(identity, timestamp_us); +CUDF_LTO_DECL(identity, timestamp_ns); +CUDF_LTO_DECL(identity, duration_D); +CUDF_LTO_DECL(identity, duration_s); +CUDF_LTO_DECL(identity, duration_ms); +CUDF_LTO_DECL(identity, duration_ns); +CUDF_LTO_DECL(identity, string_view); + +CUDF_LTO_DECL(sin, float); +CUDF_LTO_DECL(sin, double); + +CUDF_LTO_DECL(cos, float); +CUDF_LTO_DECL(cos, double); + +CUDF_LTO_DECL(tan, float); +CUDF_LTO_DECL(tan, double); + +CUDF_LTO_DECL(arcsin, float); +CUDF_LTO_DECL(arcsin, double); + +CUDF_LTO_DECL(arccos, float); +CUDF_LTO_DECL(arccos, double); + +CUDF_LTO_DECL(arctan, float); +CUDF_LTO_DECL(arctan, double); + +CUDF_LTO_DECL(sinh, float); +CUDF_LTO_DECL(sinh, double); + +CUDF_LTO_DECL(cosh, float); +CUDF_LTO_DECL(cosh, double); + +CUDF_LTO_DECL(tanh, float); +CUDF_LTO_DECL(tanh, double); + +CUDF_LTO_DECL(arcsinh, float); +CUDF_LTO_DECL(arcsinh, double); + +CUDF_LTO_DECL(arccosh, float); +CUDF_LTO_DECL(arccosh, double); + +CUDF_LTO_DECL(arctanh, float); +CUDF_LTO_DECL(arctanh, double); + +CUDF_LTO_DECL(exp, float); +CUDF_LTO_DECL(exp, double); + +CUDF_LTO_DECL(log, float); +CUDF_LTO_DECL(log, double); + +CUDF_LTO_DECL(cbrt, float); +CUDF_LTO_DECL(cbrt, double); + +CUDF_LTO_DECL(ceil, float); +CUDF_LTO_DECL(ceil, double); + +CUDF_LTO_DECL(floor, float); +CUDF_LTO_DECL(floor, double); + +CUDF_LTO_DECL(abs, int32_t); +CUDF_LTO_DECL(abs, int64_t); +CUDF_LTO_DECL(abs, float); +CUDF_LTO_DECL(abs, double); + +CUDF_LTO_DECL(rint, float); +CUDF_LTO_DECL(rint, double); + +CUDF_LTO_DECL(bit_invert, uint32_t); +CUDF_LTO_DECL(bit_invert, uint64_t); +CUDF_LTO_DECL(bit_invert, int32_t); +CUDF_LTO_DECL(bit_invert, int64_t); + +#undef CUDF_LTO_DECL + +#define CUDF_LTO_DECL(op, ret_type, type) \ + extern __device__ void op(ret_type* out, type const* a); \ + \ + extern __device__ void op(optional* out, optional const* a) + +CUDF_LTO_DECL(cast_to_int64, int64_t, bool); +CUDF_LTO_DECL(cast_to_int64, int64_t, int8_t); +CUDF_LTO_DECL(cast_to_int64, int64_t, int16_t); +CUDF_LTO_DECL(cast_to_int64, int64_t, int32_t); +CUDF_LTO_DECL(cast_to_int64, int64_t, int64_t); +CUDF_LTO_DECL(cast_to_int64, int64_t, uint8_t); +CUDF_LTO_DECL(cast_to_int64, int64_t, uint16_t); +CUDF_LTO_DECL(cast_to_int64, int64_t, uint32_t); +CUDF_LTO_DECL(cast_to_int64, int64_t, uint64_t); +CUDF_LTO_DECL(cast_to_int64, int64_t, float); +CUDF_LTO_DECL(cast_to_int64, int64_t, double); + +CUDF_LTO_DECL(cast_to_uint64, uint64_t, bool); +CUDF_LTO_DECL(cast_to_uint64, uint64_t, int8_t); +CUDF_LTO_DECL(cast_to_uint64, uint64_t, int16_t); +CUDF_LTO_DECL(cast_to_uint64, uint64_t, int32_t); +CUDF_LTO_DECL(cast_to_uint64, uint64_t, int64_t); +CUDF_LTO_DECL(cast_to_uint64, uint64_t, uint8_t); +CUDF_LTO_DECL(cast_to_uint64, uint64_t, uint16_t); +CUDF_LTO_DECL(cast_to_uint64, uint64_t, uint32_t); +CUDF_LTO_DECL(cast_to_uint64, uint64_t, uint64_t); +CUDF_LTO_DECL(cast_to_uint64, uint64_t, float); +CUDF_LTO_DECL(cast_to_uint64, uint64_t, double); + +CUDF_LTO_DECL(cast_to_float64, double, bool); +CUDF_LTO_DECL(cast_to_float64, double, int8_t); +CUDF_LTO_DECL(cast_to_float64, double, int16_t); +CUDF_LTO_DECL(cast_to_float64, double, int32_t); +CUDF_LTO_DECL(cast_to_float64, double, int64_t); +CUDF_LTO_DECL(cast_to_float64, double, uint8_t); +CUDF_LTO_DECL(cast_to_float64, double, uint16_t); +CUDF_LTO_DECL(cast_to_float64, double, uint32_t); +CUDF_LTO_DECL(cast_to_float64, double, uint64_t); +CUDF_LTO_DECL(cast_to_float64, double, float); +CUDF_LTO_DECL(cast_to_float64, double, double); + +#undef CUDF_LTO_DECL + +#define CUDF_LTO_DECL(op, type) \ + extern __device__ void op(bool* out, type const* a); \ + \ + extern __device__ void op(bool* out, optional const* a); \ + \ + extern __device__ void op(optional* out, optional const* a) + +CUDF_LTO_DECL(is_null, bool); +CUDF_LTO_DECL(is_null, int8_t); +CUDF_LTO_DECL(is_null, int16_t); +CUDF_LTO_DECL(is_null, int32_t); +CUDF_LTO_DECL(is_null, int64_t); +CUDF_LTO_DECL(is_null, uint8_t); +CUDF_LTO_DECL(is_null, uint16_t); +CUDF_LTO_DECL(is_null, uint32_t); +CUDF_LTO_DECL(is_null, uint64_t); +CUDF_LTO_DECL(is_null, float); +CUDF_LTO_DECL(is_null, double); +CUDF_LTO_DECL(is_null, decimal32); +CUDF_LTO_DECL(is_null, decimal64); +CUDF_LTO_DECL(is_null, decimal128); +CUDF_LTO_DECL(is_null, timestamp_D); +CUDF_LTO_DECL(is_null, timestamp_s); +CUDF_LTO_DECL(is_null, timestamp_ms); +CUDF_LTO_DECL(is_null, timestamp_us); +CUDF_LTO_DECL(is_null, timestamp_ns); +CUDF_LTO_DECL(is_null, duration_D); +CUDF_LTO_DECL(is_null, duration_s); +CUDF_LTO_DECL(is_null, duration_ms); +CUDF_LTO_DECL(is_null, duration_ns); +CUDF_LTO_DECL(is_null, string_view); + +CUDF_LTO_DECL(logical_not, bool); +CUDF_LTO_DECL(logical_not, int8_t); +CUDF_LTO_DECL(logical_not, int16_t); +CUDF_LTO_DECL(logical_not, int32_t); +CUDF_LTO_DECL(logical_not, int64_t); +CUDF_LTO_DECL(logical_not, uint8_t); +CUDF_LTO_DECL(logical_not, uint16_t); +CUDF_LTO_DECL(logical_not, uint32_t); +CUDF_LTO_DECL(logical_not, uint64_t); + +#undef CUDF_LTO_DECL + +} // namespace operators +} // namespace lto +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/transform/jit/lto_def.cu b/cpp/src/transform/jit/lto_def.cu new file mode 100644 index 00000000000..0fc4d152d2e --- /dev/null +++ b/cpp/src/transform/jit/lto_def.cu @@ -0,0 +1,571 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include +#include + +namespace CUDF_EXPORT cudf { +namespace lto { + +template +using functor = ast::detail::operator_functor; + +template +using null_functor = ast::detail::operator_functor; + +using opcode = ast::ast_operator; + +// __device__ void column_device_view_core::head( ) const +// { +// *out = from_lto_ref(column).head(); +// } + +//[ ] from_lto_ref +#define CUDF_LTO_DEF(op, OP, type) \ + __device__ void operators::op(type* out, type const* a, type const* b) \ + { \ + auto ret = functor{}(*lto::lift(a), *lto::lift(b)); \ + *out = *lto::lower(&ret); \ + } \ + \ + __device__ void operators::op( \ + lto::optional* out, lto::optional const* a, lto::optional const* b) \ + { \ + auto ret = null_functor{}(*lto::lift(a), *lto::lift(b)); \ + *out = *lto::lower(&ret); \ + } + +CUDF_LTO_DEF(add, ADD, int32_t); +CUDF_LTO_DEF(add, ADD, int64_t); +CUDF_LTO_DEF(add, ADD, uint32_t); +CUDF_LTO_DEF(add, ADD, uint64_t); +CUDF_LTO_DEF(add, ADD, float); +CUDF_LTO_DEF(add, ADD, double); +CUDF_LTO_DEF(add, ADD, decimal32); +CUDF_LTO_DEF(add, ADD, decimal64); +CUDF_LTO_DEF(add, ADD, decimal128); +CUDF_LTO_DEF(add, ADD, duration_D); +CUDF_LTO_DEF(add, ADD, duration_s); +CUDF_LTO_DEF(add, ADD, duration_ms); +CUDF_LTO_DEF(add, ADD, duration_ns); + +CUDF_LTO_DEF(sub, SUB, int32_t); +CUDF_LTO_DEF(sub, SUB, int64_t); +CUDF_LTO_DEF(sub, SUB, uint32_t); +CUDF_LTO_DEF(sub, SUB, uint64_t); +CUDF_LTO_DEF(sub, SUB, float); +CUDF_LTO_DEF(sub, SUB, double); +CUDF_LTO_DEF(sub, SUB, decimal32); +CUDF_LTO_DEF(sub, SUB, decimal64); +CUDF_LTO_DEF(sub, SUB, decimal128); +CUDF_LTO_DEF(sub, SUB, duration_D); +CUDF_LTO_DEF(sub, SUB, duration_s); +CUDF_LTO_DEF(sub, SUB, duration_ms); +CUDF_LTO_DEF(sub, SUB, duration_ns); + +CUDF_LTO_DEF(mul, MUL, int32_t); +CUDF_LTO_DEF(mul, MUL, int64_t); +CUDF_LTO_DEF(mul, MUL, uint32_t); +CUDF_LTO_DEF(mul, MUL, uint64_t); +CUDF_LTO_DEF(mul, MUL, float); +CUDF_LTO_DEF(mul, MUL, double); +CUDF_LTO_DEF(mul, MUL, decimal32); +CUDF_LTO_DEF(mul, MUL, decimal64); +CUDF_LTO_DEF(mul, MUL, decimal128); + +CUDF_LTO_DEF(div, DIV, int32_t); +CUDF_LTO_DEF(div, DIV, int64_t); +CUDF_LTO_DEF(div, DIV, uint32_t); +CUDF_LTO_DEF(div, DIV, uint64_t); +CUDF_LTO_DEF(div, DIV, float); +CUDF_LTO_DEF(div, DIV, double); +CUDF_LTO_DEF(div, DIV, decimal32); +CUDF_LTO_DEF(div, DIV, decimal64); +CUDF_LTO_DEF(div, DIV, decimal128); + +CUDF_LTO_DEF(mod, MOD, float); +CUDF_LTO_DEF(mod, MOD, double); + +CUDF_LTO_DEF(pymod, PYMOD, float); +CUDF_LTO_DEF(pymod, PYMOD, double); + +CUDF_LTO_DEF(pow, POW, float); +CUDF_LTO_DEF(pow, POW, double); + +#undef CUDF_LTO_DEF + +#define CUDF_LTO_DEF(op, OP, type) \ + __device__ void operators::op(bool* out, type const* a, type const* b) \ + { \ + auto ret = functor{}(*lto::lift(a), *lto::lift(b)); \ + *out = *lto::lower(&ret); \ + } \ + \ + __device__ void operators::op(bool* out, optional const* a, optional const* b) \ + { \ + auto ret = null_functor{}(*lto::lift(a), *lto::lift(b)); \ + *out = **lto::lower(&ret); \ + } \ + \ + __device__ void operators::op( \ + optional* out, optional const* a, optional const* b) \ + { \ + auto ret = null_functor{}(*lto::lift(a), *lto::lift(b)); \ + *out = *lto::lower(&ret); \ + } + +CUDF_LTO_DEF(equal, EQUAL, bool); +CUDF_LTO_DEF(equal, EQUAL, int8_t); +CUDF_LTO_DEF(equal, EQUAL, int16_t); +CUDF_LTO_DEF(equal, EQUAL, int32_t); +CUDF_LTO_DEF(equal, EQUAL, int64_t); +CUDF_LTO_DEF(equal, EQUAL, uint8_t); +CUDF_LTO_DEF(equal, EQUAL, uint16_t); +CUDF_LTO_DEF(equal, EQUAL, uint32_t); +CUDF_LTO_DEF(equal, EQUAL, uint64_t); +CUDF_LTO_DEF(equal, EQUAL, float); +CUDF_LTO_DEF(equal, EQUAL, double); +CUDF_LTO_DEF(equal, EQUAL, decimal32); +CUDF_LTO_DEF(equal, EQUAL, decimal64); +CUDF_LTO_DEF(equal, EQUAL, decimal128); +CUDF_LTO_DEF(equal, EQUAL, timestamp_D); +CUDF_LTO_DEF(equal, EQUAL, timestamp_s); +CUDF_LTO_DEF(equal, EQUAL, timestamp_ms); +CUDF_LTO_DEF(equal, EQUAL, timestamp_us); +CUDF_LTO_DEF(equal, EQUAL, timestamp_ns); +CUDF_LTO_DEF(equal, EQUAL, duration_D); +CUDF_LTO_DEF(equal, EQUAL, duration_s); +CUDF_LTO_DEF(equal, EQUAL, duration_ms); +CUDF_LTO_DEF(equal, EQUAL, duration_ns); +CUDF_LTO_DEF(equal, EQUAL, string_view); + +CUDF_LTO_DEF(null_equal, NULL_EQUAL, bool); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, int8_t); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, int16_t); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, int32_t); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, int64_t); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, uint8_t); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, uint16_t); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, uint32_t); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, uint64_t); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, float); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, double); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, decimal32); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, decimal64); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, decimal128); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, timestamp_D); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, timestamp_s); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, timestamp_ms); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, timestamp_us); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, timestamp_ns); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, duration_D); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, duration_s); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, duration_ms); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, duration_ns); +CUDF_LTO_DEF(null_equal, NULL_EQUAL, string_view); + +#undef CUDF_LTO_DEF + +#define CUDF_LTO_DEF(op, OP, type) \ + __device__ void operators::op(bool* out, type const* a, type const* b) \ + { \ + auto ret = functor{}(*lto::lift(a), *lto::lift(b)); \ + *out = *lto::lower(&ret); \ + } \ + \ + __device__ void operators::op( \ + optional* out, optional const* a, optional const* b) \ + { \ + auto ret = null_functor{}(*lto::lift(a), *lto::lift(b)); \ + *out = *lto::lower(&ret); \ + } + +CUDF_LTO_DEF(less, LESS, bool); +CUDF_LTO_DEF(less, LESS, int8_t); +CUDF_LTO_DEF(less, LESS, int16_t); +CUDF_LTO_DEF(less, LESS, int32_t); +CUDF_LTO_DEF(less, LESS, int64_t); +CUDF_LTO_DEF(less, LESS, uint8_t); +CUDF_LTO_DEF(less, LESS, uint16_t); +CUDF_LTO_DEF(less, LESS, uint32_t); +CUDF_LTO_DEF(less, LESS, uint64_t); +CUDF_LTO_DEF(less, LESS, float); +CUDF_LTO_DEF(less, LESS, double); +CUDF_LTO_DEF(less, LESS, decimal32); +CUDF_LTO_DEF(less, LESS, decimal64); +CUDF_LTO_DEF(less, LESS, decimal128); +CUDF_LTO_DEF(less, LESS, timestamp_D); +CUDF_LTO_DEF(less, LESS, timestamp_s); +CUDF_LTO_DEF(less, LESS, timestamp_ms); +CUDF_LTO_DEF(less, LESS, timestamp_us); +CUDF_LTO_DEF(less, LESS, timestamp_ns); +CUDF_LTO_DEF(less, LESS, duration_D); +CUDF_LTO_DEF(less, LESS, duration_s); +CUDF_LTO_DEF(less, LESS, duration_ms); +CUDF_LTO_DEF(less, LESS, duration_ns); +CUDF_LTO_DEF(less, LESS, string_view); + +CUDF_LTO_DEF(greater, GREATER, bool); +CUDF_LTO_DEF(greater, GREATER, int8_t); +CUDF_LTO_DEF(greater, GREATER, int16_t); +CUDF_LTO_DEF(greater, GREATER, int32_t); +CUDF_LTO_DEF(greater, GREATER, int64_t); +CUDF_LTO_DEF(greater, GREATER, uint8_t); +CUDF_LTO_DEF(greater, GREATER, uint16_t); +CUDF_LTO_DEF(greater, GREATER, uint32_t); +CUDF_LTO_DEF(greater, GREATER, uint64_t); +CUDF_LTO_DEF(greater, GREATER, float); +CUDF_LTO_DEF(greater, GREATER, double); +CUDF_LTO_DEF(greater, GREATER, decimal32); +CUDF_LTO_DEF(greater, GREATER, decimal64); +CUDF_LTO_DEF(greater, GREATER, decimal128); +CUDF_LTO_DEF(greater, GREATER, timestamp_D); +CUDF_LTO_DEF(greater, GREATER, timestamp_s); +CUDF_LTO_DEF(greater, GREATER, timestamp_ms); +CUDF_LTO_DEF(greater, GREATER, timestamp_us); +CUDF_LTO_DEF(greater, GREATER, timestamp_ns); +CUDF_LTO_DEF(greater, GREATER, duration_D); +CUDF_LTO_DEF(greater, GREATER, duration_s); +CUDF_LTO_DEF(greater, GREATER, duration_ms); +CUDF_LTO_DEF(greater, GREATER, duration_ns); +CUDF_LTO_DEF(greater, GREATER, string_view); + +CUDF_LTO_DEF(less_equal, LESS_EQUAL, bool); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, int8_t); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, int16_t); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, int32_t); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, int64_t); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, uint8_t); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, uint16_t); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, uint32_t); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, uint64_t); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, float); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, double); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, decimal32); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, decimal64); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, decimal128); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, timestamp_D); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, timestamp_s); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, timestamp_ms); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, timestamp_us); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, timestamp_ns); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, duration_D); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, duration_s); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, duration_ms); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, duration_ns); +CUDF_LTO_DEF(less_equal, LESS_EQUAL, string_view); + +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, bool); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, int8_t); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, int16_t); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, int32_t); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, int64_t); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, uint8_t); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, uint16_t); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, uint32_t); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, uint64_t); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, float); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, double); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, decimal32); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, decimal64); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, decimal128); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, timestamp_D); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, timestamp_s); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, timestamp_ms); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, timestamp_us); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, timestamp_ns); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, duration_D); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, duration_s); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, duration_ms); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, duration_ns); +CUDF_LTO_DEF(greater_equal, GREATER_EQUAL, string_view); + +#undef CUDF_LTO_DEF + +#define CUDF_LTO_DEF(op, OP, type) \ + __device__ void operators::op(type* out, type const* a, type const* b) \ + { \ + auto ret = functor{}(*lto::lift(a), *lto::lift(b)); \ + *out = *lto::lower(&ret); \ + } \ + \ + __device__ void operators::op( \ + optional* out, optional const* a, optional const* b) \ + { \ + auto ret = null_functor{}(*lto::lift(a), *lto::lift(b)); \ + *out = *lto::lower(&ret); \ + } + +CUDF_LTO_DEF(bitwise_and, BITWISE_AND, int32_t); +CUDF_LTO_DEF(bitwise_and, BITWISE_AND, int64_t); +CUDF_LTO_DEF(bitwise_and, BITWISE_AND, uint32_t); +CUDF_LTO_DEF(bitwise_and, BITWISE_AND, uint64_t); + +CUDF_LTO_DEF(bitwise_or, BITWISE_OR, int32_t); +CUDF_LTO_DEF(bitwise_or, BITWISE_OR, int64_t); +CUDF_LTO_DEF(bitwise_or, BITWISE_OR, uint32_t); +CUDF_LTO_DEF(bitwise_or, BITWISE_OR, uint64_t); + +CUDF_LTO_DEF(bitwise_xor, BITWISE_XOR, int32_t); +CUDF_LTO_DEF(bitwise_xor, BITWISE_XOR, int64_t); +CUDF_LTO_DEF(bitwise_xor, BITWISE_XOR, uint32_t); +CUDF_LTO_DEF(bitwise_xor, BITWISE_XOR, uint64_t); + +#undef CUDF_LTO_DEF + +#define CUDF_LTO_DEF(op, OP, type) \ + __device__ void operators::op(type* out, type const* a, type const* b) \ + { \ + auto ret = functor{}(*lto::lift(a), *lto::lift(b)); \ + *out = *lto::lower(&ret); \ + } \ + \ + __device__ void operators::op( \ + optional* out, optional const* a, optional const* b) \ + { \ + auto ret = null_functor{}(*lto::lift(a), *lto::lift(b)); \ + *out = *lto::lower(&ret); \ + } + +CUDF_LTO_DEF(logical_and, LOGICAL_AND, bool); + +CUDF_LTO_DEF(null_logical_and, NULL_LOGICAL_AND, bool); + +CUDF_LTO_DEF(logical_or, LOGICAL_OR, bool); + +CUDF_LTO_DEF(null_logical_or, NULL_LOGICAL_OR, bool); + +#undef CUDF_LTO_DEF + +#define CUDF_LTO_DEF(op, OP, type) \ + __device__ void operators::op(type* out, type const* a) \ + { \ + auto ret = functor{}(*lto::lift(a)); \ + *out = *lto::lower(&ret); \ + } \ + \ + __device__ void operators::op(optional* out, optional const* a) \ + { \ + auto ret = null_functor{}(*lto::lift(a)); \ + *out = *lto::lower(&ret); \ + } + +CUDF_LTO_DEF(identity, IDENTITY, bool); +CUDF_LTO_DEF(identity, IDENTITY, int8_t); +CUDF_LTO_DEF(identity, IDENTITY, int16_t); +CUDF_LTO_DEF(identity, IDENTITY, int32_t); +CUDF_LTO_DEF(identity, IDENTITY, int64_t); +CUDF_LTO_DEF(identity, IDENTITY, uint8_t); +CUDF_LTO_DEF(identity, IDENTITY, uint16_t); +CUDF_LTO_DEF(identity, IDENTITY, uint32_t); +CUDF_LTO_DEF(identity, IDENTITY, uint64_t); +CUDF_LTO_DEF(identity, IDENTITY, float); +CUDF_LTO_DEF(identity, IDENTITY, double); +CUDF_LTO_DEF(identity, IDENTITY, decimal32); +CUDF_LTO_DEF(identity, IDENTITY, decimal64); +CUDF_LTO_DEF(identity, IDENTITY, decimal128); +CUDF_LTO_DEF(identity, IDENTITY, timestamp_D); +CUDF_LTO_DEF(identity, IDENTITY, timestamp_s); +CUDF_LTO_DEF(identity, IDENTITY, timestamp_ms); +CUDF_LTO_DEF(identity, IDENTITY, timestamp_us); +CUDF_LTO_DEF(identity, IDENTITY, timestamp_ns); +CUDF_LTO_DEF(identity, IDENTITY, duration_D); +CUDF_LTO_DEF(identity, IDENTITY, duration_s); +CUDF_LTO_DEF(identity, IDENTITY, duration_ms); +CUDF_LTO_DEF(identity, IDENTITY, duration_ns); +CUDF_LTO_DEF(identity, IDENTITY, string_view); + +CUDF_LTO_DEF(sin, SIN, float); +CUDF_LTO_DEF(sin, SIN, double); + +CUDF_LTO_DEF(cos, COS, float); +CUDF_LTO_DEF(cos, COS, double); + +CUDF_LTO_DEF(tan, TAN, float); +CUDF_LTO_DEF(tan, TAN, double); + +CUDF_LTO_DEF(arcsin, ARCSIN, float); +CUDF_LTO_DEF(arcsin, ARCSIN, double); + +CUDF_LTO_DEF(arccos, ARCCOS, float); +CUDF_LTO_DEF(arccos, ARCCOS, double); + +CUDF_LTO_DEF(arctan, ARCTAN, float); +CUDF_LTO_DEF(arctan, ARCTAN, double); + +CUDF_LTO_DEF(sinh, SINH, float); +CUDF_LTO_DEF(sinh, SINH, double); + +CUDF_LTO_DEF(cosh, COSH, float); +CUDF_LTO_DEF(cosh, COSH, double); + +CUDF_LTO_DEF(tanh, TANH, float); +CUDF_LTO_DEF(tanh, TANH, double); + +CUDF_LTO_DEF(arcsinh, ARCSINH, float); +CUDF_LTO_DEF(arcsinh, ARCSINH, double); + +CUDF_LTO_DEF(arccosh, ARCCOSH, float); +CUDF_LTO_DEF(arccosh, ARCCOSH, double); + +CUDF_LTO_DEF(arctanh, ARCTANH, float); +CUDF_LTO_DEF(arctanh, ARCTANH, double); + +CUDF_LTO_DEF(exp, EXP, float); +CUDF_LTO_DEF(exp, EXP, double); + +CUDF_LTO_DEF(log, LOG, float); +CUDF_LTO_DEF(log, LOG, double); + +CUDF_LTO_DEF(cbrt, CBRT, float); +CUDF_LTO_DEF(cbrt, CBRT, double); + +CUDF_LTO_DEF(ceil, CEIL, float); +CUDF_LTO_DEF(ceil, CEIL, double); + +CUDF_LTO_DEF(floor, FLOOR, float); +CUDF_LTO_DEF(floor, FLOOR, double); + +CUDF_LTO_DEF(abs, ABS, int32_t); +CUDF_LTO_DEF(abs, ABS, int64_t); +CUDF_LTO_DEF(abs, ABS, float); +CUDF_LTO_DEF(abs, ABS, double); + +CUDF_LTO_DEF(rint, RINT, float); +CUDF_LTO_DEF(rint, RINT, double); + +CUDF_LTO_DEF(bit_invert, BIT_INVERT, uint32_t); +CUDF_LTO_DEF(bit_invert, BIT_INVERT, uint64_t); +CUDF_LTO_DEF(bit_invert, BIT_INVERT, int32_t); +CUDF_LTO_DEF(bit_invert, BIT_INVERT, int64_t); + +#undef CUDF_LTO_DEF + +#define CUDF_LTO_DEF(op, OP, ret_type, type) \ + extern __device__ void operators::op(ret_type* out, type const* a) \ + { \ + auto ret = functor{}(*lto::lift(a)); \ + *out = *lto::lower(&ret); \ + } \ + \ + extern __device__ void operators::op(optional* out, optional const* a) \ + { \ + auto ret = null_functor{}(*lto::lift(a)); \ + *out = *lto::lower(&ret); \ + } + +CUDF_LTO_DEF(cast_to_int64, CAST_TO_INT64, int64_t, bool); +CUDF_LTO_DEF(cast_to_int64, CAST_TO_INT64, int64_t, int8_t); +CUDF_LTO_DEF(cast_to_int64, CAST_TO_INT64, int64_t, int16_t); +CUDF_LTO_DEF(cast_to_int64, CAST_TO_INT64, int64_t, int32_t); +CUDF_LTO_DEF(cast_to_int64, CAST_TO_INT64, int64_t, int64_t); +CUDF_LTO_DEF(cast_to_int64, CAST_TO_INT64, int64_t, uint8_t); +CUDF_LTO_DEF(cast_to_int64, CAST_TO_INT64, int64_t, uint16_t); +CUDF_LTO_DEF(cast_to_int64, CAST_TO_INT64, int64_t, uint32_t); +CUDF_LTO_DEF(cast_to_int64, CAST_TO_INT64, int64_t, uint64_t); +CUDF_LTO_DEF(cast_to_int64, CAST_TO_INT64, int64_t, float); +CUDF_LTO_DEF(cast_to_int64, CAST_TO_INT64, int64_t, double); + +CUDF_LTO_DEF(cast_to_uint64, CAST_TO_UINT64, uint64_t, bool); +CUDF_LTO_DEF(cast_to_uint64, CAST_TO_UINT64, uint64_t, int8_t); +CUDF_LTO_DEF(cast_to_uint64, CAST_TO_UINT64, uint64_t, int16_t); +CUDF_LTO_DEF(cast_to_uint64, CAST_TO_UINT64, uint64_t, int32_t); +CUDF_LTO_DEF(cast_to_uint64, CAST_TO_UINT64, uint64_t, int64_t); +CUDF_LTO_DEF(cast_to_uint64, CAST_TO_UINT64, uint64_t, uint8_t); +CUDF_LTO_DEF(cast_to_uint64, CAST_TO_UINT64, uint64_t, uint16_t); +CUDF_LTO_DEF(cast_to_uint64, CAST_TO_UINT64, uint64_t, uint32_t); +CUDF_LTO_DEF(cast_to_uint64, CAST_TO_UINT64, uint64_t, uint64_t); +CUDF_LTO_DEF(cast_to_uint64, CAST_TO_UINT64, uint64_t, float); +CUDF_LTO_DEF(cast_to_uint64, CAST_TO_UINT64, uint64_t, double); + +CUDF_LTO_DEF(cast_to_float64, CAST_TO_FLOAT64, double, bool); +CUDF_LTO_DEF(cast_to_float64, CAST_TO_FLOAT64, double, int8_t); +CUDF_LTO_DEF(cast_to_float64, CAST_TO_FLOAT64, double, int16_t); +CUDF_LTO_DEF(cast_to_float64, CAST_TO_FLOAT64, double, int32_t); +CUDF_LTO_DEF(cast_to_float64, CAST_TO_FLOAT64, double, int64_t); +CUDF_LTO_DEF(cast_to_float64, CAST_TO_FLOAT64, double, uint8_t); +CUDF_LTO_DEF(cast_to_float64, CAST_TO_FLOAT64, double, uint16_t); +CUDF_LTO_DEF(cast_to_float64, CAST_TO_FLOAT64, double, uint32_t); +CUDF_LTO_DEF(cast_to_float64, CAST_TO_FLOAT64, double, uint64_t); +CUDF_LTO_DEF(cast_to_float64, CAST_TO_FLOAT64, double, float); +CUDF_LTO_DEF(cast_to_float64, CAST_TO_FLOAT64, double, double); + +#undef CUDF_LTO_DEF + +#define CUDF_LTO_DEF(op, OP, type) \ + __device__ void operators::op(bool* out, type const* a) \ + { \ + auto ret = functor{}(*lto::lift(a)); \ + *out = *lto::lower(&ret); \ + } \ + \ + __device__ void operators::op(bool* out, optional const* a) \ + { \ + auto ret = null_functor{}(*lto::lift(a)); \ + *out = *lto::lower(&ret); \ + } \ + \ + __device__ void operators::op(optional* out, optional const* a) \ + { \ + auto ret = null_functor{}(*lto::lift(a)); \ + *out = *lto::lower(&ret); \ + } + +CUDF_LTO_DEF(is_null, IS_NULL, bool); +CUDF_LTO_DEF(is_null, IS_NULL, int8_t); +CUDF_LTO_DEF(is_null, IS_NULL, int16_t); +CUDF_LTO_DEF(is_null, IS_NULL, int32_t); +CUDF_LTO_DEF(is_null, IS_NULL, int64_t); +CUDF_LTO_DEF(is_null, IS_NULL, uint8_t); +CUDF_LTO_DEF(is_null, IS_NULL, uint16_t); +CUDF_LTO_DEF(is_null, IS_NULL, uint32_t); +CUDF_LTO_DEF(is_null, IS_NULL, uint64_t); +CUDF_LTO_DEF(is_null, IS_NULL, float); +CUDF_LTO_DEF(is_null, IS_NULL, double); +CUDF_LTO_DEF(is_null, IS_NULL, decimal32); +CUDF_LTO_DEF(is_null, IS_NULL, decimal64); +CUDF_LTO_DEF(is_null, IS_NULL, decimal128); +CUDF_LTO_DEF(is_null, IS_NULL, timestamp_D); +CUDF_LTO_DEF(is_null, IS_NULL, timestamp_s); +CUDF_LTO_DEF(is_null, IS_NULL, timestamp_ms); +CUDF_LTO_DEF(is_null, IS_NULL, timestamp_us); +CUDF_LTO_DEF(is_null, IS_NULL, timestamp_ns); +CUDF_LTO_DEF(is_null, IS_NULL, duration_D); +CUDF_LTO_DEF(is_null, IS_NULL, duration_s); +CUDF_LTO_DEF(is_null, IS_NULL, duration_ms); +CUDF_LTO_DEF(is_null, IS_NULL, duration_ns); +CUDF_LTO_DEF(is_null, IS_NULL, string_view); + +#undef CUDF_LTO_DEF + +#define CUDF_LTO_DEF(op, OP, type) \ + __device__ void operators::op(bool* out, type const* a) \ + { \ + auto ret = functor{}(*lto::lift(a)); \ + *out = *lto::lower(&ret); \ + } \ + \ + __device__ void operators::op(optional* out, optional const* a) \ + { \ + auto ret = null_functor{}(*lto::lift(a)); \ + *out = *lto::lower(&ret); \ + } + +CUDF_LTO_DEF(logical_not, NOT, bool); +CUDF_LTO_DEF(logical_not, NOT, int8_t); +CUDF_LTO_DEF(logical_not, NOT, int16_t); +CUDF_LTO_DEF(logical_not, NOT, int32_t); +CUDF_LTO_DEF(logical_not, NOT, int64_t); +CUDF_LTO_DEF(logical_not, NOT, uint8_t); +CUDF_LTO_DEF(logical_not, NOT, uint16_t); +CUDF_LTO_DEF(logical_not, NOT, uint32_t); +CUDF_LTO_DEF(logical_not, NOT, uint64_t); + +#undef CUDF_LTO_DEF + +} // namespace lto +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/transform/jit/lto_kernel.cu b/cpp/src/transform/jit/lto_kernel.cu new file mode 100644 index 00000000000..5e0d9254bc6 --- /dev/null +++ b/cpp/src/transform/jit/lto_kernel.cu @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +extern "C" { + +__device__ void transform_operator(cudf::lto::transform_params const*); + +__global__ void transform_kernel(void const* outputs, + void const* span_outputs, + void const* inputs, + void* user_data, + cudf::size_type num_rows) +{ + auto const start = cudf::detail::grid_1d::global_thread_id(); + auto const stride = cudf::detail::grid_1d::grid_stride(); + auto const size = num_rows; + + for (auto i = start; i < size; i += stride) { + auto row_index = i; + + cudf::lto::transform_params p{.inputs = inputs, + .user_data = user_data, + .outputs = outputs, + .span_outputs = span_outputs, + .row_index = static_cast(i)}; + transform_operator(&p); + } +} +} diff --git a/cpp/src/transform/jit/lto_thunk.cuh b/cpp/src/transform/jit/lto_thunk.cuh new file mode 100644 index 00000000000..7e8f7e66758 --- /dev/null +++ b/cpp/src/transform/jit/lto_thunk.cuh @@ -0,0 +1,76 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace CUDF_EXPORT cudf { + +namespace lto { + +#define CUDF_LTO_MAP(lowered_type, lifted_type) \ + static_assert(sizeof(lowered_type) == sizeof(lifted_type), \ + "(1: size). Lowered and Lifted types must be equivalent"); \ + static_assert(alignof(lowered_type) == alignof(lifted_type), \ + "(2: alignment). Lowered and Lifted types must be equivalent"); \ + static_assert(sizeof(lto::optional) == sizeof(cuda::std::optional), \ + "(1: size). Lowered and Lifted types must have equivalent optional types"); \ + static_assert(alignof(lto::optional) == alignof(cuda::std::optional), \ + "(2: alignment). Lowered and Lifted types must have equivalent optional types"); \ + __device__ lowered_type* lower(lifted_type*); \ + __device__ lowered_type const* lower(lifted_type const*); \ + __device__ lto::optional* lower(cuda::std::optional*); \ + __device__ lto::optional const* lower(cuda::std::optional const*); \ + __device__ lifted_type* lift(lowered_type*); \ + __device__ lifted_type const* lift(lowered_type const*); \ + __device__ cuda::std::optional* lift(lto::optional*); \ + __device__ cuda::std::optional const* lift(lto::optional const*) + +CUDF_LTO_MAP(bool, bool); +CUDF_LTO_MAP(data_type, cudf::data_type); +CUDF_LTO_MAP(int8_t, int8_t); +CUDF_LTO_MAP(int16_t, int16_t); +CUDF_LTO_MAP(int32_t, int32_t); +CUDF_LTO_MAP(int64_t, int64_t); +CUDF_LTO_MAP(uint8_t, uint8_t); +CUDF_LTO_MAP(uint16_t, uint16_t); +CUDF_LTO_MAP(uint32_t, uint32_t); +CUDF_LTO_MAP(uint64_t, uint64_t); +CUDF_LTO_MAP(float, float); +CUDF_LTO_MAP(double, double); +CUDF_LTO_MAP(lto::decimal32, numeric::decimal32); +CUDF_LTO_MAP(lto::decimal64, numeric::decimal64); +CUDF_LTO_MAP(lto::decimal128, numeric::decimal128); +CUDF_LTO_MAP(lto::string_view, cudf::string_view); +CUDF_LTO_MAP(lto::timestamp_D, cudf::timestamp_D); +CUDF_LTO_MAP(lto::timestamp_h, cudf::timestamp_h); +CUDF_LTO_MAP(lto::timestamp_m, cudf::timestamp_m); +CUDF_LTO_MAP(lto::timestamp_s, cudf::timestamp_s); +CUDF_LTO_MAP(lto::timestamp_ms, cudf::timestamp_ms); +CUDF_LTO_MAP(lto::timestamp_us, cudf::timestamp_us); +CUDF_LTO_MAP(lto::timestamp_ns, cudf::timestamp_ns); +CUDF_LTO_MAP(lto::duration_D, cudf::duration_D); +CUDF_LTO_MAP(lto::duration_h, cudf::duration_h); +CUDF_LTO_MAP(lto::duration_m, cudf::duration_m); +CUDF_LTO_MAP(lto::duration_s, cudf::duration_s); +CUDF_LTO_MAP(lto::duration_ms, cudf::duration_ms); +CUDF_LTO_MAP(lto::duration_us, cudf::duration_us); +CUDF_LTO_MAP(lto::duration_ns, cudf::duration_ns); +CUDF_LTO_MAP(lto::column_device_view_core, cudf::column_device_view_core); +CUDF_LTO_MAP(lto::mutable_column_device_view_core, cudf::mutable_column_device_view_core); + +#undef CUDF_LTO_MAP + +} // namespace lto + +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/transform/jit/transform_params.cuh b/cpp/src/transform/jit/transform_params.cuh new file mode 100644 index 00000000000..e1da4baf14f --- /dev/null +++ b/cpp/src/transform/jit/transform_params.cuh @@ -0,0 +1,40 @@ + +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once +#include + +#include + +namespace CUDF_EXPORT cudf { + +namespace lto { + +struct transform_params { + void const* inputs = nullptr; + void* user_data = nullptr; + void const* outputs = nullptr; + void const* span_outputs = nullptr; + size_type row_index = 0; +}; + +} // namespace lto +} // namespace CUDF_EXPORT cudf diff --git a/lto-ir-notes.txt b/lto-ir-notes.txt new file mode 100644 index 00000000000..f5ab9944f87 --- /dev/null +++ b/lto-ir-notes.txt @@ -0,0 +1 @@ +Not compatible across major versions