Skip to content

Commit 5efce73

Browse files
authored
[compiler-rt][ARM] Optimized mulsf3 and divsf3 (#168394)
(Reland of #161546, fixing three build and test issues) This commit adds optimized assembly versions of single-precision float multiplication and division. Both functions are implemented in a style that can be assembled as either of Arm and Thumb2; for multiplication, a separate implementation is provided for Thumb1. Also, extensive new tests are added for multiplication and division. These implementations can be removed from the build by defining the cmake variable COMPILER_RT_ARM_OPTIMIZED_FP=OFF. Outlying parts of the functionality which are not on the fast path, such as NaN handling and underflow, are handled in helper functions written in C. These can be shared between the Arm/Thumb2 and Thumb1 implementations, and also reused by other optimized assembly functions we hope to add in future.
1 parent 200793a commit 5efce73

File tree

11 files changed

+2484
-95
lines changed

11 files changed

+2484
-95
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Helper function to find out whether the assembler supports a particular
2+
# command-line flag. You'd like to use the standard check_compiler_flag(), but
3+
# that only supports a fixed list of languages, and ASM isn't one of them. So
4+
# we do it ourselves, by trying to assemble an empty source file.
5+
6+
function(check_assembler_flag outvar flag)
7+
if(NOT DEFINED "${outvar}")
8+
if(NOT CMAKE_REQUIRED_QUIET)
9+
message(CHECK_START "Checking for assembler flag ${flag}")
10+
endif()
11+
12+
# Stop try_compile from attempting to link the result of the assembly, so
13+
# that we don't depend on having a working linker, and also don't have to
14+
# figure out what special symbol like _start needs to be defined in the
15+
# test input.
16+
#
17+
# This change is made within the dynamic scope of this function, so
18+
# CMAKE_TRY_COMPILE_TARGET_TYPE will be restored to its previous value on
19+
# return.
20+
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
21+
22+
# Try to assemble an empty file with a .S name, using the provided flag.
23+
set(asm_source_file
24+
${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CheckAssemblerFlag.S)
25+
write_file(${asm_source_file} "")
26+
try_compile(${outvar}
27+
${CMAKE_BINARY_DIR}
28+
SOURCES ${asm_source_file}
29+
COMPILE_DEFINITIONS ${flag})
30+
31+
if(NOT CMAKE_REQUIRED_QUIET)
32+
if(${outvar})
33+
message(CHECK_PASS "Accepted")
34+
else()
35+
message(CHECK_FAIL "Not accepted")
36+
endif()
37+
endif()
38+
endif()
39+
endfunction()

compiler-rt/lib/builtins/CMakeLists.txt

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ endif()
6060
include(builtin-config-ix)
6161
include(CMakeDependentOption)
6262
include(CMakePushCheckState)
63+
include(CheckAssemblerFlag)
6364

6465
option(COMPILER_RT_BUILTINS_HIDE_SYMBOLS
6566
"Do not export any symbols from the static library." ON)
@@ -423,6 +424,40 @@ set(arm_or_thumb2_base_SOURCES
423424
${GENERIC_SOURCES}
424425
)
425426

427+
option(COMPILER_RT_ARM_OPTIMIZED_FP
428+
"On 32-bit Arm, use optimized assembly implementations of FP arithmetic. Likely to increase code size, but be faster." ON)
429+
430+
set(arm_or_thumb2_optimized_fp_SOURCES)
431+
if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH MATCHES "arm")
432+
check_assembler_flag(COMPILER_RT_HAS_MIMPLICIT_IT -mimplicit-it=always)
433+
if(COMPILER_RT_HAS_MIMPLICIT_IT)
434+
set(implicit_it_flag -mimplicit-it=always)
435+
else()
436+
check_assembler_flag(
437+
COMPILER_RT_HAS_WA_MIMPLICIT_IT -Wa,-mimplicit-it=always)
438+
if(COMPILER_RT_HAS_WA_MIMPLICIT_IT)
439+
set(implicit_it_flag -Wa,-mimplicit-it=always)
440+
else()
441+
message(WARNING "Don't know how to set the -mimplicit-it=always flag in this assembler; not including Arm optimized implementations")
442+
set(implicit_it_flag "")
443+
endif()
444+
endif()
445+
446+
if(implicit_it_flag)
447+
set(assembly_files
448+
arm/mulsf3.S
449+
arm/divsf3.S)
450+
set_source_files_properties(${assembly_files}
451+
PROPERTIES COMPILE_OPTIONS ${implicit_it_flag})
452+
set(arm_or_thumb2_optimized_fp_SOURCES
453+
${assembly_files}
454+
arm/fnan2.c
455+
arm/fnorm2.c
456+
arm/funder.c
457+
)
458+
endif()
459+
endif()
460+
426461
set(arm_sync_SOURCES
427462
arm/sync_fetch_and_add_4.S
428463
arm/sync_fetch_and_add_8.S
@@ -456,6 +491,16 @@ set(thumb1_base_SOURCES
456491
${GENERIC_SOURCES}
457492
)
458493

494+
if(COMPILER_RT_ARM_OPTIMIZED_FP)
495+
set(thumb1_base_SOURCES
496+
arm/thumb1/mulsf3.S
497+
arm/fnan2.c
498+
arm/fnorm2.c
499+
arm/funder.c
500+
${thumb1_base_SOURCES}
501+
)
502+
endif()
503+
459504
set(arm_EABI_RT_SOURCES
460505
arm/aeabi_cdcmp.S
461506
arm/aeabi_cdcmpeq_check_nan.c
@@ -567,6 +612,7 @@ if(MINGW)
567612
arm/aeabi_uldivmod.S
568613
arm/chkstk.S
569614
${arm_or_thumb2_base_SOURCES}
615+
${arm_or_thumb2_optimized_fp_SOURCES}
570616
${arm_sync_SOURCES}
571617
)
572618

@@ -577,6 +623,7 @@ elseif(NOT WIN32)
577623
# TODO the EABI sources should only be added to EABI targets
578624
set(arm_SOURCES
579625
${arm_or_thumb2_base_SOURCES}
626+
${arm_or_thumb2_optimized_fp_SOURCES}
580627
${arm_sync_SOURCES}
581628
${arm_EABI_SOURCES}
582629
${arm_Thumb1_SOURCES}

0 commit comments

Comments
 (0)