-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[libclc] Fix commands in compile_to_bc are executed sequentially #130755
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
1f8b5bf
3181480
ea2f503
603255c
aed94cf
952e8c1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,8 @@ | ||
| # Compiles an OpenCL C - or assembles an LL file - to bytecode | ||
| # | ||
| # Arguments: | ||
| # * TARGET <string> | ||
| # Custom target to create | ||
| # * TRIPLE <string> | ||
| # Target triple for which to compile the bytecode file. | ||
| # * INPUT <string> | ||
|
|
@@ -17,7 +19,7 @@ | |
| function(compile_to_bc) | ||
| cmake_parse_arguments(ARG | ||
| "" | ||
| "TRIPLE;INPUT;OUTPUT" | ||
| "TARGET;TRIPLE;INPUT;OUTPUT" | ||
| "EXTRA_OPTS;DEPENDENCIES" | ||
| ${ARGN} | ||
| ) | ||
|
|
@@ -63,13 +65,15 @@ function(compile_to_bc) | |
| ${ARG_DEPENDENCIES} | ||
| DEPFILE ${ARG_OUTPUT}.d | ||
| ) | ||
| add_custom_target( ${ARG_TARGET} DEPENDS ${ARG_OUTPUT}${TMP_SUFFIX} ) | ||
|
|
||
| if( ${FILE_EXT} STREQUAL ".ll" ) | ||
| add_custom_command( | ||
| OUTPUT ${ARG_OUTPUT} | ||
| COMMAND ${llvm-as_exe} -o ${ARG_OUTPUT} ${ARG_OUTPUT}${TMP_SUFFIX} | ||
| DEPENDS ${llvm-as_target} ${ARG_OUTPUT}${TMP_SUFFIX} | ||
| ) | ||
| add_custom_target( ${ARG_TARGET}-as DEPENDS ${ARG_OUTPUT} ) | ||
| endif() | ||
| endfunction() | ||
|
|
||
|
|
@@ -227,6 +231,7 @@ function(add_libclc_builtin_set) | |
|
|
||
| set( bytecode_files ) | ||
| set( bytecode_ir_files ) | ||
| set( compile_tgts ) | ||
| foreach( file IN LISTS ARG_GEN_FILES ARG_LIB_FILES ) | ||
| # We need to take each file and produce an absolute input file, as well | ||
| # as a unique architecture-specific output file. We deal with a mix of | ||
|
|
@@ -256,19 +261,25 @@ function(add_libclc_builtin_set) | |
|
|
||
| get_filename_component( file_dir ${file} DIRECTORY ) | ||
|
|
||
| string( REPLACE "/" "-" replaced ${file} ) | ||
| set( tgt compile_tgt-${ARG_ARCH_SUFFIX}${replaced}) | ||
|
|
||
| compile_to_bc( | ||
| TARGET ${tgt} | ||
| TRIPLE ${ARG_TRIPLE} | ||
| INPUT ${input_file} | ||
| OUTPUT ${output_file} | ||
| EXTRA_OPTS -fno-builtin -nostdlib | ||
| "${ARG_COMPILE_FLAGS}" -I${CMAKE_CURRENT_SOURCE_DIR}/${file_dir} | ||
| DEPENDENCIES ${input_file_dep} | ||
| ) | ||
| list( APPEND compile_tgts ${tgt} ) | ||
|
|
||
| # Collect all files originating in LLVM IR separately | ||
| get_filename_component( file_ext ${file} EXT ) | ||
| if( ${file_ext} STREQUAL ".ll" ) | ||
| list( APPEND bytecode_ir_files ${output_file} ) | ||
| list( APPEND compile_tgts ${tgt}-as ) | ||
|
||
| else() | ||
| list( APPEND bytecode_files ${output_file} ) | ||
| endif() | ||
|
|
@@ -283,7 +294,7 @@ function(add_libclc_builtin_set) | |
|
|
||
| set( builtins_comp_lib_tgt builtins.comp.${ARG_ARCH_SUFFIX} ) | ||
| add_custom_target( ${builtins_comp_lib_tgt} | ||
| DEPENDS ${bytecode_files} | ||
| DEPENDS ${compile_tgts} | ||
|
||
| ) | ||
| set_target_properties( ${builtins_comp_lib_tgt} PROPERTIES FOLDER "libclc/Device IR/Comp" ) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think the (possibly) "temp" output (in the case of
.llfiles) should be given the honour of theARG_TARGETtarget. The final output of this function should unconditionally haveARG_TARGET. That saves doing-asworkarounds in other parts of the code. TheTARGETname the user passes should be definitive, in other words.Do we strictly need a target for the intermediate step too? If we do, we could have
${ARG_TARGET}${TMP_SUFFIX}here. Then non-IR files getARG_TARGETtarget, and then in the IR block below we createARG_TARGETfor that case.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have simplified the code by removing ${ARG_TARGET}-as
The issue we observing is that .cl files are compiled sequentially. We don't see problem with .ll file, so we can leave it as-is.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yes that's much simpler.
I'm half wondering if this isn't only for enabling parallel execution but is technically required for correct behaviour. I know CMake can behave strangely when only depending upon files and not targets, which is that we're doing before this PR. Though even with this PR, we're still implicitly depending on files coming out of the
llvm-asstep. That's what makes me a bit nervous. This is still better than what we've got so maybe we can keep an eye on it and see if we have any parallel build issues stemming from.llfiles, and we can adjust as needed.I think I'll also post a follow-up which handles empty arguments. I was going to ask about handling an empty
TARGETbut noticed there's nothing for emptyINPUTorOUTPUTso maybe that's best left handled later in one go.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah, I agree