Skip to content
38 changes: 38 additions & 0 deletions compiler-rt/cmake/Modules/CheckAssemblerFlag.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Helper function to find out whether the assembler supports a particular
# command-line flag. You'd like to use the standard check_compiler_flag(), but
# that only supports a fixed list of languages, and ASM isn't one of them. So
# we do it ourselves, by trying to assemble an empty source file.

function(check_assembler_flag outvar flag)
if(NOT DEFINED "${outvar}")
if(NOT CMAKE_REQUIRED_QUIET)
message(CHECK_START "Checking for assembler flag ${flag}")
endif()

# Stop try_compile from attempting to link the result of the assembly, so
# that we don't depend on having a working linker, and also don't have to
# figure out what special symbol like _start needs to be defined in the
# test input.
#
# This change is made within the dynamic scope of this function, so
# CMAKE_TRY_COMPILE_TARGET_TYPE will be restored to its previous value on
# return.
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)

# Try to assemble an empty file with a .S name, using the provided flag.
try_compile(success
SOURCE_FROM_CONTENT "CheckAssemblerFlag.s" ""
COMPILE_DEFINITIONS ${flag}
NO_CACHE)

if(NOT CMAKE_REQUIRED_QUIET)
if(success)
message(CHECK_PASS "Accepted")
set(${outvar} 1 CACHE INTERNAL "Test assembler flag ${flag}")
else()
message(CHECK_FAIL "Not accepted")
set(${outvar} "" CACHE INTERNAL "Test assembler flag ${flag}")
endif()
endif()
endif()
endfunction()
45 changes: 45 additions & 0 deletions compiler-rt/lib/builtins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ endif()
include(builtin-config-ix)
include(CMakeDependentOption)
include(CMakePushCheckState)
include(CheckAssemblerFlag)

option(COMPILER_RT_BUILTINS_HIDE_SYMBOLS
"Do not export any symbols from the static library." ON)
Expand Down Expand Up @@ -422,6 +423,40 @@ set(arm_or_thumb2_base_SOURCES
${GENERIC_SOURCES}
)

option(COMPILER_RT_ARM_OPTIMIZED_FP
"On 32-bit Arm, use optimized assembly implementations of FP arithmetic. Likely to increase code size, but be faster." ON)

if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH MATCHES "arm")
check_assembler_flag(COMPILER_RT_HAS_MIMPLICIT_IT -mimplicit-it=always)
if(COMPILER_RT_HAS_MIMPLICIT_IT)
set(implicit_it_flag -mimplicit-it=always)
else()
check_assembler_flag(
COMPILER_RT_HAS_WA_MIMPLICIT_IT -Wa,-mimplicit-it=always)
if(COMPILER_RT_HAS_WA_MIMPLICIT_IT)
set(implicit_it_flag -Wa,-mimplicit-it=always)
else()
message(WARNING "Don't know how to set the -mimplicit-it=always flag in this assembler; not including Arm optimized implementations")
set(implicit_it_flag "")
endif()
endif()

if(implicit_it_flag)
set(assembly_files
arm/mulsf3.S
arm/divsf3.S)
set_source_files_properties(${assembly_files}
PROPERTIES COMPILE_OPTIONS ${implicit_it_flag})
set(arm_or_thumb2_base_SOURCES
${assembly_files}
arm/fnan2.c
arm/fnorm2.c
arm/funder.c
${arm_or_thumb2_base_SOURCES}
)
endif()
endif()

set(arm_sync_SOURCES
arm/sync_fetch_and_add_4.S
arm/sync_fetch_and_add_8.S
Expand Down Expand Up @@ -455,6 +490,16 @@ set(thumb1_base_SOURCES
${GENERIC_SOURCES}
)

if(COMPILER_RT_ARM_OPTIMIZED_FP)
set(thumb1_base_SOURCES
arm/thumb1/mulsf3.S
arm/fnan2.c
arm/fnorm2.c
arm/funder.c
${thumb1_base_SOURCES}
)
endif()

set(arm_EABI_RT_SOURCES
arm/aeabi_cdcmp.S
arm/aeabi_cdcmpeq_check_nan.c
Expand Down
608 changes: 608 additions & 0 deletions compiler-rt/lib/builtins/arm/divsf3.S

Large diffs are not rendered by default.

42 changes: 42 additions & 0 deletions compiler-rt/lib/builtins/arm/fnan2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//===-- fnan2.c - Handle single-precision NaN inputs to binary operation --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This helper function is available for use by single-precision float
// arithmetic implementations to handle propagating NaNs from the input
// operands to the output, in a way that matches Arm hardware FP.
//
// On input, a and b are floating-point numbers in IEEE 754 encoding, and at
// least one of them must be a NaN. The return value is the correct output NaN.
//
// A signalling NaN in the input (with bit 22 clear) takes priority over any
// quiet NaN, and is adjusted on return by setting bit 22 to make it quiet. If
// both inputs are the same type of NaN then the first input takes priority:
// the input a is used instead of b.
//
//===----------------------------------------------------------------------===//

#include <stdint.h>

uint32_t __compiler_rt_fnan2(uint32_t a, uint32_t b) {
// Make shifted-left copies of a and b to discard the sign bit. Then add 1 at
// the bit position where the quiet vs signalling bit ended up. This squashes
// all the signalling NaNs to the top of the range of 32-bit values, from
// 0xff800001 to 0xffffffff inclusive; meanwhile, all the quiet NaN values
// wrap round to the bottom, from 0 to 0x007fffff inclusive. So we can detect
// a signalling NaN by asking if it's greater than 0xff800000, and a quiet
// one by asking if it's less than 0x00800000.
uint32_t aadj = (a << 1) + 0x00800000;
uint32_t badj = (b << 1) + 0x00800000;
if (aadj > 0xff800000) // a is a signalling NaN?
return a | 0x00400000; // if so, return it with the quiet bit set
if (badj > 0xff800000) // b is a signalling NaN?
return b | 0x00400000; // if so, return it with the quiet bit set
if (aadj < 0x00800000) // a is a quiet NaN?
return a; // if so, return it
return b; // otherwise we expect b must be a quiet NaN
}
62 changes: 62 additions & 0 deletions compiler-rt/lib/builtins/arm/fnorm2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//===-- fnorm2.c - Handle single-precision denormal inputs to binary op ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This helper function is available for use by single-precision float
// arithmetic implementations, to handle denormal inputs on entry by
// renormalizing the mantissa and modifying the exponent to match.
//
//===----------------------------------------------------------------------===//

#include <stdint.h>

// Structure containing the function's inputs and outputs.
//
// On entry: a, b are two input floating-point numbers, still in IEEE 754
// encoding. expa and expb are the 8-bit exponents of those numbers, extracted
// and shifted down to the low 8 bits of the word, with no other change.
// Neither value should be zero, or have the maximum exponent (indicating an
// infinity or NaN).
//
// On exit: each of a and b contains the mantissa of the input value, with the
// leading 1 bit made explicit, and shifted up to the top of the word. If expa
// was zero (indicating that a was denormal) then it is now represented as a
// normalized number with an out-of-range exponent (zero or negative). The same
// applies to expb and b.
struct fnorm2 {
uint32_t a, b, expa, expb;
};

void __compiler_rt_fnorm2(struct fnorm2 *values) {
// Shift the mantissas of a and b to the right place to follow a leading 1 in
// the top bit, if there is one.
values->a <<= 8;
values->b <<= 8;

// Test if a is denormal.
if (values->expa == 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Future enhancement idea: extract the adjustment into a helper and share across the two values.

// If so, decide how much further up to shift its mantissa, and adjust its
// exponent to match. This brings the leading 1 of the denormal mantissa to
// the top of values->a.
uint32_t shift = __builtin_clz(values->a);
values->a <<= shift;
values->expa = 1 - shift;
} else {
// Otherwise, leave the mantissa of a in its current position, and OR in
// the explicit leading 1.
values->a |= 0x80000000;
}

// Do the same operation on b.
if (values->expb == 0) {
uint32_t shift = __builtin_clz(values->b);
values->b <<= shift;
values->expb = 1 - shift;
} else {
values->b |= 0x80000000;
}
}
78 changes: 78 additions & 0 deletions compiler-rt/lib/builtins/arm/funder.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
//===-- funder.c - Handle single-precision floating-point underflow -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This helper function is available for use by single-precision float
// arithmetic implementations to handle underflowed output values, if they were
// computed in the form of a normalized mantissa and an out-of-range exponent.
//
// On input: x should be a complete IEEE 754 floating-point value representing
// the desired output scaled up by 2^192 (the same value that would have been
// passed to an underflow trap handler in IEEE 754:1985).
//
// This isn't enough information to re-round to the correct output denormal
// without also knowing whether x itself has already been rounded, and which
// way. 'errsign' gives this information, by indicating the sign of the value
// (true result - x). That is, if errsign > 0 it means the true value was
// larger (x was rounded down); if errsign < 0 then x was rounded up; if
// errsign == 0 then x represents the _exact_ desired output value.
//
//===----------------------------------------------------------------------===//

#include <stdint.h>

#define SIGNBIT 0x80000000
#define MANTSIZE 23
#define BIAS 0xc0

uint32_t __compiler_rt_funder(uint32_t x, uint32_t errsign) {
uint32_t sign = x & SIGNBIT;
uint32_t exponent = (x << 1) >> 24;

// Rule out exponents so small (or large!) that no denormalisation
// is needed.
if (exponent > BIAS) {
// Exponent 0xc1 or above means a normalised number got here by
// mistake, so we just remove the 0xc0 exponent bias and go
// straight home.
return x - (BIAS << MANTSIZE);
}
uint32_t bits_lost = BIAS + 1 - exponent;
if (bits_lost > MANTSIZE + 1) {
// The implicit leading 1 of the intermediate value's mantissa is
// below the lowest mantissa bit of a denormal by at least 2 bits.
// Round down to 0 unconditionally.
return sign;
}

// Make the full mantissa (with leading bit) at the top of the word.
uint32_t mantissa = 0x80000000 | (x << 8);
// Adjust by 1 depending on the sign of the error.
mantissa -= errsign >> 31;
mantissa += (-errsign) >> 31;

// Shift down to the output position, keeping the bits shifted off.
uint32_t outmant, shifted_off;
if (bits_lost == MANTSIZE + 1) {
// Special case for the exponent where we have to shift the whole
// of 'mantissa' off the bottom of the word.
outmant = 0;
shifted_off = mantissa;
} else {
outmant = mantissa >> (8 + bits_lost);
shifted_off = mantissa << (32 - (8 + bits_lost));
}

// Re-round.
if (shifted_off >> 31) {
outmant++;
if (!(shifted_off << 1))
outmant &= ~1; // halfway case: round to even
}

return sign | outmant;
}
Loading