Skip to content

Commit 5eac061

Browse files
AlexMWellslgritz
authored andcommitted
fix(batched): codegen bug for compref with varying index (#1776)
Fix bug in batched codegen of compref when the index is varying. The loadvalue was left to defaulted to uniform load, which we can't store into a varying/wide result. Fixed similar bug in getmessage, and added check for spline knotcount to be uniform (although varying support could be added). This bug exposed a missed optimization in BatchedAnalyser where a early out (return) needs to change the conditional to be varying to support masking. However no check was being done to see if the loop control existed higher in the callstack. So now the execution scope tracks loop nesting depth to see if the current function is really in a loop or not. This allows the loop control flow to remain uniform (avoiding masking and more complex control flow). Added nestloop-reg to testsuite to reproduce bug and verify its fixed. --------- Signed-off-by: Alex M. Wells <[email protected]>
1 parent b9907e1 commit 5eac061

File tree

6 files changed

+140
-16
lines changed

6 files changed

+140
-16
lines changed

src/cmake/testing.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ macro (osl_add_all_tests)
289289
metadata-braces min-reg miscmath missing-shader
290290
mix-reg
291291
named-components
292+
nestedloop-reg
292293
noise noise-cell
293294
noise-gabor noise-gabor2d-filter noise-gabor3d-filter
294295
noise-gabor-reg

src/liboslexec/batched_analysis.cpp

Lines changed: 61 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -838,11 +838,12 @@ class FunctionTreeTracker {
838838
std::vector<Position> m_function_stack;
839839
std::vector<Position> m_before_if_block_stack;
840840
std::vector<Position> m_after_if_block_stack;
841+
std::vector<int> m_loop_depth_stack;
841842

842843
public:
843844
OSL_FORCEINLINE FunctionTreeTracker() : m_top_of_stack(end_pos())
844845
{
845-
push_function_call();
846+
push_function_call(0);
846847
}
847848

848849
FunctionTreeTracker(const FunctionTreeTracker&) = delete;
@@ -977,11 +978,12 @@ class FunctionTreeTracker {
977978

978979
OSL_FORCEINLINE Iterator end() const { return Iterator(*this, end_pos()); }
979980

980-
OSL_FORCEINLINE void push_function_call()
981+
OSL_FORCEINLINE void push_function_call(int loop_depth_at_function)
981982
{
982983
OSL_DEV_ONLY(std::cout << "DependencyTreeTracker push_function_call"
983984
<< std::endl);
984985
m_function_stack.push_back(m_top_of_stack);
986+
m_loop_depth_stack.push_back(loop_depth_at_function);
985987
}
986988

987989
OSL_FORCEINLINE void push_if_block()
@@ -1062,7 +1064,16 @@ class FunctionTreeTracker {
10621064

10631065
OSL_FORCEINLINE Position top_pos() const { return m_top_of_stack; }
10641066

1065-
OSL_FORCEINLINE void pop_function_call()
1067+
OSL_FORCEINLINE int loop_depth_at_start_of_call()
1068+
{
1069+
if (m_loop_depth_stack.empty()) {
1070+
return 0;
1071+
} else {
1072+
return m_loop_depth_stack.back();
1073+
}
1074+
}
1075+
1076+
OSL_FORCEINLINE void pop_function_call(int loop_depth_at_function)
10661077
{
10671078
OSL_DEV_ONLY(std::cout << "DependencyTreeTracker pop_function_call"
10681079
<< std::endl);
@@ -1090,6 +1101,9 @@ class FunctionTreeTracker {
10901101
process_exit(early_out.dtt_pos);
10911102
}
10921103
}
1104+
1105+
OSL_ASSERT(m_loop_depth_stack.back() == loop_depth_at_function);
1106+
m_loop_depth_stack.pop_back();
10931107
}
10941108
};
10951109

@@ -1214,8 +1228,11 @@ class LoopStack {
12141228

12151229
std::vector<LoopInfo> m_loop_info_by_depth;
12161230

1231+
bool is_inside_loop() const { return depth() != 0; }
1232+
12171233
public:
1218-
bool is_inside_loop() const { return !m_loop_info_by_depth.empty(); }
1234+
int depth() const { return m_loop_info_by_depth.size(); }
1235+
12191236

12201237
int current_loop_op_index() const
12211238
{
@@ -1603,7 +1620,7 @@ struct Analyzer {
16031620

16041621
OSL_NOINLINE void make_loops_control_flow_depend_on_early_out_conditions()
16051622
{
1606-
// Need change the loop control flow which is dependent upon
1623+
// Need to change the loop control flow which is dependent upon
16071624
// a conditional. By making a circular dependency between the this
16081625
// [return|exit|break|continue] operation and the conditionals value,
16091626
// any varying values in the conditional controlling
@@ -1986,9 +2003,11 @@ struct Analyzer {
19862003
// as the current block, there was no conditionals involved
19872004
OSL_DEV_ONLY(std::cout << " FUNCTION CALL BLOCK BEGIN"
19882005
<< std::endl);
1989-
m_execution_scope_stack.push_function_call();
2006+
m_execution_scope_stack.push_function_call(
2007+
m_loop_stack.depth());
19902008
discover_symbols_between(op_index + 1, opcode.jump(0));
1991-
m_execution_scope_stack.pop_function_call();
2009+
m_execution_scope_stack.pop_function_call(
2010+
m_loop_stack.depth());
19922011
OSL_DEV_ONLY(std::cout << " FUNCTION CALL BLOCK END"
19932012
<< std::endl);
19942013

@@ -2020,12 +2039,16 @@ struct Analyzer {
20202039
m_execution_scope_stack.process_return(
20212040
m_conditional_symbol_stack.top_pos());
20222041

2023-
// The return will need change the loop control flow which is dependent upon
2042+
// IF AND ONLY IF, the current loop control exists inside
2043+
// the current function, because the return should only early
2044+
// out of those loops, not any higher in the call stack.
2045+
// The return will need to change the loop control flow which is dependent upon
20242046
// a conditional. By making a circular dependency between the return operation
20252047
// and the conditionals value, any varying values in the conditional controlling
20262048
// the return should flow back to the loop control variable, which might need to
20272049
// be varying so allow lanes to terminate the loop independently
2028-
if (m_loop_stack.is_inside_loop()) {
2050+
if (m_loop_stack.depth()
2051+
> m_execution_scope_stack.loop_depth_at_start_of_call()) {
20292052
make_loops_control_flow_depend_on_early_out_conditions();
20302053
}
20312054
}
@@ -2037,12 +2060,18 @@ struct Analyzer {
20372060
m_execution_scope_stack.process_exit(
20382061
m_conditional_symbol_stack.top_pos());
20392062

2040-
// The exit will need change the loop control flow which is dependent upon
2063+
// IF AND ONLY IF, the current loop control exists inside
2064+
// the current function, because the return should only early
2065+
// out of those loops, not any higher in the call stack.
2066+
// A different mechanism will look for early outs when
2067+
// unwinding up the callstack.
2068+
// The exit will need to change the loop control flow which is dependent upon
20412069
// a conditional. By making a circular dependency between the exit operation
20422070
// and the conditionals value, any varying values in the conditional controlling
20432071
// the exit should flow back to the loop control variable, which might need to
20442072
// be varying so allow lanes to terminate the loop independently
2045-
if (m_loop_stack.is_inside_loop()) {
2073+
if (m_loop_stack.depth()
2074+
> m_execution_scope_stack.loop_depth_at_start_of_call()) {
20462075
make_loops_control_flow_depend_on_early_out_conditions();
20472076
}
20482077
}
@@ -2095,6 +2124,10 @@ struct Analyzer {
20952124
|| symbol_to_be_varying->typespec().is_closure()
20962125
|| symbol_to_be_varying->connected()
20972126
|| symbol_to_be_varying->connected_down());
2127+
OSL_DEV_ONLY(std::cout << "<<<< make_varying symbol: "
2128+
<< symbol_to_be_varying->unmangled().c_str()
2129+
<< " force=" << force << std::endl);
2130+
20982131
symbol_to_be_varying->make_varying();
20992132
auto range = m_symbols_dependent_upon.equal_range(
21002133
symbol_to_be_varying);
@@ -2107,6 +2140,9 @@ struct Analyzer {
21072140
recursively_mark_varying(dependent_symbol);
21082141
}
21092142
};
2143+
OSL_DEV_ONLY(std::cout << ">>>> end make_varying symbol: "
2144+
<< symbol_to_be_varying->unmangled().c_str()
2145+
<< std::endl);
21102146
}
21112147
};
21122148

@@ -2639,6 +2675,8 @@ struct Analyzer {
26392675

26402676
OSL_NOINLINE void push_varying_of_shader_globals()
26412677
{
2678+
OSL_DEV_ONLY(std::cout << "push_varying_of_shader_globals begin"
2679+
<< std::endl);
26422680
for (auto&& s : inst()->symbols()) {
26432681
if (s.symtype() == SymTypeGlobal) {
26442682
// TODO: now that symbol has is_uniform()
@@ -2659,10 +2697,14 @@ struct Analyzer {
26592697
// so force their dependents to get marked
26602698
recursively_mark_varying(sym_ptr, true /*force*/);
26612699
}
2700+
OSL_DEV_ONLY(std::cout << "push_varying_of_shader_globals end"
2701+
<< std::endl);
26622702
}
26632703

26642704
OSL_NOINLINE void make_renderer_outputs_varying()
26652705
{
2706+
OSL_DEV_ONLY(std::cout << "make_renderer_outputs_varying begin"
2707+
<< std::endl);
26662708
// Mark all output parameters as varying to catch
26672709
// output parameters written to by uniform variables,
26682710
// as nothing would have made them varying, however as
@@ -2679,10 +2721,14 @@ struct Analyzer {
26792721
}
26802722
}
26812723
}
2724+
OSL_DEV_ONLY(std::cout << "make_renderer_outputs_varying end"
2725+
<< std::endl);
26822726
}
26832727

26842728
OSL_NOINLINE void make_interpolated_parameters_varying()
26852729
{
2730+
OSL_DEV_ONLY(std::cout << "make_interpolated_parameters_varying begin"
2731+
<< std::endl);
26862732
// Mark all interpolated parameters as varying,
26872733
// As we expect interpolated data, get_userdata will
26882734
// only provide varying values, so we must mark
@@ -2694,10 +2740,13 @@ struct Analyzer {
26942740
recursively_mark_varying(&s);
26952741
}
26962742
}
2743+
OSL_DEV_ONLY(std::cout << "make_interpolated_parameters_varying end"
2744+
<< std::endl);
26972745
}
26982746

26992747
OSL_NOINLINE void make_closures_varying()
27002748
{
2749+
OSL_DEV_ONLY(std::cout << "make_closures_varying begin" << std::endl);
27012750
// We assume that closures are always stored as varying pointers
27022751
FOREACH_SYM(Symbol & s, inst())
27032752
{
@@ -2707,6 +2756,7 @@ struct Analyzer {
27072756
recursively_mark_varying(&s);
27082757
}
27092758
}
2759+
OSL_DEV_ONLY(std::cout << "make_closures_varying end" << std::endl);
27102760
}
27112761

27122762
OSL_NOINLINE void push_varying_of_upstream_connections()

src/liboslexec/batched_llvm_gen.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2287,7 +2287,8 @@ LLVMGEN(llvm_gen_compref)
22872287

22882288
bool op_is_uniform = Result.is_uniform();
22892289

2290-
llvm::Value* c = rop.llvm_load_value(Index);
2290+
llvm::Value* c = rop.llvm_load_value(Index, /*deriv=*/0, /*component=*/0,
2291+
TypeDesc::UNKNOWN, Index.is_uniform());
22912292

22922293
if (Index.is_uniform()) {
22932294
if (rop.inst()->master()->range_checking()) {
@@ -6757,13 +6758,18 @@ LLVMGEN(llvm_gen_getmessage)
67576758
rop.ll.call_function(rop.build_name(FuncSpec("getmessage").mask()),
67586759
args);
67596760
} else {
6760-
llvm::Value* nameVal = rop.llvm_load_value(Name);
6761+
llvm::Value* nameVal
6762+
= rop.llvm_load_value(Name, /*deriv=*/0, /*component=*/0,
6763+
TypeDesc::UNKNOWN, nameVal_is_uniform);
67616764
if (nameVal_is_uniform) {
67626765
args[nameArgumentIndex] = nameVal;
67636766
}
67646767

6765-
llvm::Value* sourceVal = has_source ? rop.llvm_load_value(Source)
6766-
: rop.ll.constant(ustring());
6768+
llvm::Value* sourceVal
6769+
= has_source
6770+
? rop.llvm_load_value(Source, /*deriv=*/0, /*component=*/0,
6771+
TypeDesc::UNKNOWN, sourceVal_is_uniform)
6772+
: rop.ll.constant(ustring());
67676773
if (sourceVal_is_uniform) {
67686774
args[sourceArgumentIndex] = sourceVal;
67696775
}
@@ -7001,6 +7007,16 @@ LLVMGEN(llvm_gen_spline)
70017007
&& (!has_knot_count
70027008
|| (has_knot_count && Knot_count.typespec().is_int())));
70037009

7010+
if (has_knot_count && !Knot_count.is_uniform()) {
7011+
// TODO: varying knot count support could be added below as we already
7012+
// have a loop to handle varying spline basis. Just need to add tests
7013+
// to exercise and verify functions properly, until then...
7014+
rop.shadingcontext()->errorfmt(
7015+
"spline nknots parameter is varying, batched code gen requires a constant or uniform nknots, called from ({}:{})",
7016+
op.sourcefile(), op.sourceline());
7017+
return false;
7018+
}
7019+
70047020
bool op_is_uniform = Spline.is_uniform() && Value.is_uniform()
70057021
&& Knots.is_uniform();
70067022

@@ -7051,7 +7067,8 @@ LLVMGEN(llvm_gen_spline)
70517067
args.push_back(rop.llvm_void_ptr(Value)); // make things easy
70527068
args.push_back(rop.llvm_void_ptr(Knots));
70537069
if (has_knot_count)
7054-
args.push_back(rop.llvm_load_value(Knot_count));
7070+
args.push_back(rop.llvm_load_value(
7071+
Knot_count)); // TODO: add support for varying Knot_count
70557072
else
70567073
args.push_back(rop.ll.constant((int)Knots.typespec().arraylength()));
70577074
args.push_back(rop.ll.constant((int)Knots.typespec().arraylength()));

testsuite/nestedloop-reg/BATCHED_REGRESSION

Whitespace-only changes.

testsuite/nestedloop-reg/run.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright Contributors to the Open Shading Language project.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
# https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
6+
7+
8+
###############################
9+
#
10+
###############################
11+
command += testshade("-t 1 -g 32 32 -od uint8 test -o cout out.tif ")
12+
outputs.append ("out.tif")
13+
14+
# expect a few LSB failures
15+
failthresh = 0.008
16+
failpercent = 3
17+

testsuite/nestedloop-reg/test.osl

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Copyright Contributors to the Open Shading Language project.
2+
// SPDX-License-Identifier: BSD-3-Clause
3+
// https://github.com/AcademySoftwareFoundation/OpenShadingLanguage
4+
5+
float limit_early_return(float channel) {
6+
if (channel > 1.0) {
7+
//exit();
8+
return 1.0;
9+
}
10+
return channel;
11+
}
12+
13+
float limit_single_return(float channel) {
14+
float result = channel;
15+
if (channel > 1.0) {
16+
result = 1.0;
17+
}
18+
return result;
19+
}
20+
21+
color soft_clip(color in_color)
22+
{
23+
color result;
24+
for (int i=0; i < 3; ++i)
25+
{
26+
#if 1
27+
result[i] = limit_early_return(in_color[i]);
28+
#else
29+
result[i] = limit_single_return(in_color[i]);
30+
#endif
31+
}
32+
return result;
33+
}
34+
35+
shader test (output color cout = 0)
36+
{
37+
color pixel = 2*P;
38+
cout = soft_clip(pixel);
39+
}

0 commit comments

Comments
 (0)