Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions .github/workflows/asan_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

name: Address Sanitizer Tests
name: Address/Undefined Sanitizer Tests

on:
pull_request:
Expand Down Expand Up @@ -47,18 +47,23 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Configure and Build with ASAN
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y build-essential cmake libpthread-stubs0-dev
- name: Configure and Build with ASAN and UBSAN
env:
CC: ${{ matrix.cc }}
CXX: ${{ matrix.cxx }}
run: |
mkdir build && cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug -DENABLE_ASAN=ON -DBUILD_JAVA=OFF
mkdir -p build && cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug -DENABLE_ASAN=ON -DENABLE_UBSAN=ON -DBUILD_ENABLE_AVX512=ON -DBUILD_CPP_ENABLE_METRICS=ON -DBUILD_JAVA=OFF
make
- name: Run Tests
working-directory: build
env:
ASAN_OPTIONS: detect_leaks=1:symbolize=1:strict_string_checks=1:halt_on_error=0:detect_container_overflow=0
LSAN_OPTIONS: suppressions=${{ github.workspace }}/.github/lsan-suppressions.txt
UBSAN_OPTIONS: print_stacktrace=1
run: |
ctest --output-on-failure
15 changes: 15 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ option(ORC_ENABLE_CLANG_TOOLS
"Enable Clang tools"
ON)

option(ENABLE_UBSAN
"Enable Undefined Behavior Sanitizer"
OFF)

# Make sure that a build type is selected
if (NOT CMAKE_BUILD_TYPE)
message(STATUS "No build type selected, default to ReleaseWithDebugInfo")
Expand Down Expand Up @@ -171,6 +175,17 @@ if (ENABLE_ASAN)
endif()
endif()

# Configure Undefined Behavior Sanitizer if enabled
if (ENABLE_UBSAN)
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function -fno-sanitize-recover=all")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined -fno-sanitize=alignment,vptr,function -fno-sanitize-recover=all")
message(STATUS "Undefined Behavior Sanitizer enabled")
else()
message(WARNING "Undefined Behavior Sanitizer is only supported for GCC and Clang compilers")
endif()
endif()

enable_testing()

INCLUDE(GNUInstallDirs) # Put it before ThirdpartyToolchain to make CMAKE_INSTALL_LIBDIR available.
Expand Down
34 changes: 2 additions & 32 deletions c++/include/orc/Int128.hh
Original file line number Diff line number Diff line change
Expand Up @@ -193,43 +193,13 @@ namespace orc {
* Shift left by the given number of bits.
* Values larger than 2**127 will shift into the sign bit.
*/
Int128& operator<<=(uint32_t bits) {
if (bits != 0) {
if (bits < 64) {
highbits_ <<= bits;
highbits_ |= (lowbits_ >> (64 - bits));
lowbits_ <<= bits;
} else if (bits < 128) {
highbits_ = static_cast<int64_t>(lowbits_) << (bits - 64);
lowbits_ = 0;
} else {
highbits_ = 0;
lowbits_ = 0;
}
}
return *this;
}
Int128& operator<<=(uint32_t bits);

/**
* Shift right by the given number of bits. Negative values will
* sign extend and fill with one bits.
*/
Int128& operator>>=(uint32_t bits) {
if (bits != 0) {
if (bits < 64) {
lowbits_ >>= bits;
lowbits_ |= static_cast<uint64_t>(highbits_ << (64 - bits));
highbits_ = static_cast<int64_t>(static_cast<uint64_t>(highbits_) >> bits);
} else if (bits < 128) {
lowbits_ = static_cast<uint64_t>(highbits_ >> (bits - 64));
highbits_ = highbits_ >= 0 ? 0 : -1l;
} else {
highbits_ = highbits_ >= 0 ? 0 : -1l;
lowbits_ = static_cast<uint64_t>(highbits_);
}
}
return *this;
}
Int128& operator>>=(uint32_t bits);

bool operator==(const Int128& right) const {
return highbits_ == right.highbits_ && lowbits_ == right.lowbits_;
Expand Down
6 changes: 6 additions & 0 deletions c++/src/Adaptor.hh.in
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ typedef SSIZE_T ssize_t;
ssize_t pread(int fd, void* buf, size_t count, off_t offset);
#endif

#if defined(__GNUC__) || defined(__clang__)
#define NO_SANITIZE_ATTR __attribute__((no_sanitize("signed-integer-overflow", "shift")))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to fix the code where signed-integer-overflow and shift issues were discovered?
Is it temporary?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a temporary workaround. I intend to split the necessary changes into multiple patches to fix the issue discovered by UBSAN.

#else
#define NO_SANITIZE_ATTR
#endif

#ifdef HAS_DIAGNOSTIC_PUSH
#ifdef __clang__
#define DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
Expand Down
3 changes: 2 additions & 1 deletion c++/src/BloomFilter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ namespace orc {
}

DIAGNOSTIC_POP

NO_SANITIZE_ATTR
void BloomFilterImpl::addHash(int64_t hash64) {
int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff);
// In Java codes, we use "hash64 >>> 32" which is an unsigned shift op.
Expand All @@ -226,6 +226,7 @@ namespace orc {
}
}

NO_SANITIZE_ATTR
bool BloomFilterImpl::testHash(int64_t hash64) const {
int32_t hash1 = static_cast<int32_t>(hash64 & 0xffffffff);
// In Java codes, we use "hash64 >>> 32" which is an unsigned shift op.
Expand Down
1 change: 1 addition & 0 deletions c++/src/BloomFilter.hh
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ namespace orc {
// Thomas Wang's integer hash function
// http://web.archive.org/web/20071223173210/http://www.concentric.net/~Ttwang/tech/inthash.htm
// Put this in header file so tests can use it as well.
NO_SANITIZE_ATTR
inline int64_t getLongHash(int64_t key) {
key = (~key) + (key << 21); // key = (key << 21) - key - 1;
key = key ^ (key >> 24);
Expand Down
7 changes: 6 additions & 1 deletion c++/src/ColumnReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,9 @@ namespace orc {
if (totalBytes <= lastBufferLength_) {
// subtract the needed bytes from the ones left over
lastBufferLength_ -= totalBytes;
if (lastBuffer_ == nullptr) {
throw ParseError("StringDirectColumnReader::skip: lastBuffer_ is null");
}
lastBuffer_ += totalBytes;
} else {
// move the stream forward after accounting for the buffered bytes
Expand Down Expand Up @@ -780,7 +783,9 @@ namespace orc {
byteBatch.blob.resize(totalLength);
char* ptr = byteBatch.blob.data();
while (bytesBuffered + lastBufferLength_ < totalLength) {
memcpy(ptr + bytesBuffered, lastBuffer_, lastBufferLength_);
if (lastBuffer_ != nullptr) {
memcpy(ptr + bytesBuffered, lastBuffer_, lastBufferLength_);
}
bytesBuffered += lastBufferLength_;
const void* readBuffer;
int readLength;
Expand Down
4 changes: 2 additions & 2 deletions c++/src/ConvertColumnReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,13 @@ namespace orc {
bool shouldThrow) {
constexpr bool isFileTypeFloatingPoint(std::is_floating_point<FileType>::value);
constexpr bool isReadTypeFloatingPoint(std::is_floating_point<ReadType>::value);
int64_t longValue = static_cast<int64_t>(srcValue);

if (isFileTypeFloatingPoint) {
if (isReadTypeFloatingPoint) {
destValue = static_cast<ReadType>(srcValue);
} else {
if (!canFitInLong(static_cast<double>(srcValue)) ||
!downCastToInteger(destValue, longValue)) {
!downCastToInteger(destValue, static_cast<int64_t>(srcValue))) {
handleOverflow<FileType, ReadType>(destBatch, idx, shouldThrow);
}
}
Expand Down
35 changes: 35 additions & 0 deletions c++/src/Int128.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,41 @@
#include <sstream>

namespace orc {
NO_SANITIZE_ATTR
Int128& Int128::operator<<=(uint32_t bits) {
if (bits != 0) {
if (bits < 64) {
highbits_ <<= bits;
highbits_ |= (lowbits_ >> (64 - bits));
lowbits_ <<= bits;
} else if (bits < 128) {
highbits_ = static_cast<int64_t>(lowbits_) << (bits - 64);
lowbits_ = 0;
} else {
highbits_ = 0;
lowbits_ = 0;
}
}
return *this;
}

NO_SANITIZE_ATTR
Int128& Int128::operator>>=(uint32_t bits) {
if (bits != 0) {
if (bits < 64) {
lowbits_ >>= bits;
lowbits_ |= static_cast<uint64_t>(highbits_ << (64 - bits));
highbits_ = static_cast<int64_t>(static_cast<uint64_t>(highbits_) >> bits);
} else if (bits < 128) {
lowbits_ = static_cast<uint64_t>(highbits_ >> (bits - 64));
highbits_ = highbits_ >= 0 ? 0 : -1l;
} else {
highbits_ = highbits_ >= 0 ? 0 : -1l;
lowbits_ = static_cast<uint64_t>(highbits_);
}
}
return *this;
}

Int128 Int128::maximumValue() {
return Int128(0x7fffffffffffffff, 0xffffffffffffffff);
Expand Down
1 change: 1 addition & 0 deletions c++/src/RLE.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ namespace orc {
add<int16_t>(data, numValues, notNull);
}

NO_SANITIZE_ATTR
void RleEncoder::writeVslong(int64_t val) {
writeVulong((val << 1) ^ (val >> 63));
}
Expand Down
1 change: 1 addition & 0 deletions c++/src/RLE.hh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

namespace orc {

NO_SANITIZE_ATTR
inline int64_t zigZag(int64_t value) {
return (value << 1) ^ (value >> 63);
}
Expand Down
Loading