Skip to content

Commit c7d7ec2

Browse files
authored
feat: Add rs_allocate_closure free function. (#1944)
Adds a new free function rs_allocate_closure which allows the renderer services to provide memory for closure storage, for both inbuilt (add/mul) and user defined closures. The existing osl_* closure handling functions now use this new free function. The CPU side fallback is to call through to the existing closure pool implementation. The GPU side fallback returns null. testshade/testrender provide an example implementation showing how to implement a stack based closure pool for the GPU. --------- Signed-off-by: Curtis Black <[email protected]>
1 parent ee5ae28 commit c7d7ec2

22 files changed

+277
-591
lines changed

src/include/OSL/rs_free_function.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,13 @@ OSL_RSOP OSL_HOSTDEVICE bool
316316
rs_trace_get(OSL::OpaqueExecContextPtr oec, OSL::ustringhash name,
317317
OSL::TypeDesc type, void* val, bool derivatives);
318318

319+
/// Allocates memory for a closure color. May return null if no memory could
320+
/// be allocated. It is the renderers responsibility to clean up these
321+
/// allocations after a shader is run and the closures have been processed.
322+
OSL_RSOP OSL_HOSTDEVICE void*
323+
rs_allocate_closure(OSL::OpaqueExecContextPtr oec, size_t size,
324+
size_t alignment);
325+
319326
/// Report errors, warnings, printf, and fprintf.
320327
/// Fmtlib style format specifier is used (vs. printf style)
321328
/// Arguments are represented as EncodedTypes (encodedtypes.h) and

src/include/OSL/strdecls.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
# error Do not include this file unless STRDECL is defined
1515
#endif
1616

17+
// avoid naming conflict with MSVC macro
18+
#ifdef RGB
19+
# undef RGB
20+
#endif
1721

1822
STRDECL("", _emptystring_)
1923
STRDECL("camera", camera)

src/liboslexec/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ if (USE_LLVM_BITCODE)
194194
EMBED_LLVM_BITCODE_IN_CPP ( "${llvm_ops_srcs}" "_host" "osl_llvm_compiled_ops" lib_src "" "${include_dirs}")
195195

196196
set (rs_dependent_ops_srcs
197-
opmatrix.cpp opfmt.cpp optexture.cpp pointcloud.cpp
197+
opmatrix.cpp opfmt.cpp optexture.cpp pointcloud.cpp opclosure.cpp
198198
)
199199
# Achieve the effect of absorbing osl_llvm_compiled_ops by adding its
200200
# sources to rs_dependent_ops_srcs which avoids having to do it at runtime.
@@ -217,6 +217,7 @@ if (USE_LLVM_BITCODE)
217217
${CMAKE_SOURCE_DIR}/src/liboslexec/opmatrix.cpp
218218
${CMAKE_SOURCE_DIR}/src/liboslexec/optexture.cpp
219219
${CMAKE_SOURCE_DIR}/src/liboslexec/pointcloud.cpp
220+
${CMAKE_SOURCE_DIR}/src/liboslexec/opclosure.cpp
220221
${CMAKE_SOURCE_DIR}/src/liboslnoise/gabornoise.cpp
221222
${CMAKE_SOURCE_DIR}/src/liboslnoise/simplexnoise.cpp
222223
)

src/liboslexec/builtindecl.h

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -109,29 +109,14 @@
109109
DECL(osl_##name##_dvdvv, "xXXX")
110110

111111

112-
113-
#ifndef __CUDA_ARCH__
114-
DECL(osl_add_closure_closure, "CXCC")
115-
DECL(osl_mul_closure_float, "CXCf")
116-
DECL(osl_mul_closure_color, "CXCc")
117-
DECL(osl_allocate_closure_component, "CXii")
118-
DECL(osl_allocate_weighted_closure_component, "CXiiX")
119-
DECL(osl_closure_to_string, "sXC")
120-
DECL(osl_closure_to_ustringhash, "hXC")
121-
#else
122-
// TODO: Figure out why trying to match the signatures between host and device
123-
// definitions fails with 'LLVM had to make a cast' assertion failure.
124-
//
125-
// In the meantime, use a signature that matches the definitions in rend_lib.cu,
126-
// where void* is used instead of ClosureColor* and ShaderGlobals*.
127112
DECL(osl_add_closure_closure, "XXXX")
128113
DECL(osl_mul_closure_float, "XXXf")
129-
DECL(osl_mul_closure_color, "XXXc")
114+
DECL(osl_mul_closure_color, "XXXX")
130115
DECL(osl_allocate_closure_component, "XXii")
131116
DECL(osl_allocate_weighted_closure_component, "XXiiX")
132117
DECL(osl_closure_to_string, "sXX")
133118
DECL(osl_closure_to_ustringhash, "hXX")
134-
#endif
119+
135120
DECL(osl_format, "hh*")
136121
DECL(osl_gen_ustringhash_pod, "hs")
137122
DECL(osl_gen_ustring, "sh")

src/liboslexec/llvm_gen.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3863,19 +3863,15 @@ LLVMGEN(llvm_gen_closure)
38633863
id_int, size_int);
38643864
llvm::Value* comp_void_ptr = return_ptr;
38653865

3866-
// For the weighted closures, we need a surrounding "if" so that it's safe
3867-
// for osl_allocate_weighted_closure_component to return NULL (unless we
3868-
// know for sure that it's constant weighted and that the weight is
3869-
// not zero).
3870-
llvm::BasicBlock* next_block = NULL;
3871-
if (weighted && !(weight->is_constant() && !rop.is_zero(*weight))) {
3872-
llvm::BasicBlock* notnull_block = rop.ll.new_basic_block(
3873-
"non_null_closure");
3874-
next_block = rop.ll.new_basic_block("");
3875-
llvm::Value* cond = rop.ll.op_ne(return_ptr, rop.ll.void_ptr_null());
3876-
rop.ll.op_branch(cond, notnull_block, next_block);
3877-
// new insert point is nonnull_block
3878-
}
3866+
// We need a surrounding "if" so that it's safe for closure allocation to
3867+
// return NULL, either because it has zero weight, or renderer services ran
3868+
// out of memory in the closure pool.
3869+
llvm::BasicBlock* notnull_block = rop.ll.new_basic_block(
3870+
"non_null_closure");
3871+
llvm::BasicBlock* next_block = rop.ll.new_basic_block("");
3872+
llvm::Value* cond = rop.ll.op_ne(return_ptr, rop.ll.void_ptr_null());
3873+
rop.ll.op_branch(cond, notnull_block, next_block);
3874+
// new insert point is nonnull_block
38793875

38803876
llvm::Value* comp_ptr
38813877
= rop.ll.ptr_cast(comp_void_ptr, rop.llvm_type_closure_component_ptr());

src/liboslexec/opclosure.cpp

Lines changed: 69 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,72 +7,120 @@
77

88
#include "oslexec_pvt.h"
99
#include <OSL/genclosure.h>
10+
#include <OSL/rs_free_function.h>
1011

1112

1213
OSL_NAMESPACE_BEGIN
1314
namespace pvt {
1415

1516

1617

17-
OSL_SHADEOP const ClosureColor*
18-
osl_add_closure_closure(ShaderGlobals* sg, const ClosureColor* a,
19-
const ClosureColor* b)
18+
OSL_SHADEOP OSL_HOSTDEVICE const void*
19+
osl_add_closure_closure(OpaqueExecContextPtr oec, const void* a_,
20+
const void* b_)
2021
{
22+
const ClosureColor* a = (const ClosureColor*)a_;
23+
const ClosureColor* b = (const ClosureColor*)b_;
2124
if (a == NULL)
2225
return b;
2326
if (b == NULL)
2427
return a;
25-
return sg->context->closure_add_allot(a, b);
28+
ClosureAdd* add = (ClosureAdd*)rs_allocate_closure(oec, sizeof(ClosureAdd),
29+
alignof(ClosureAdd));
30+
if (add) {
31+
add->id = ClosureColor::ADD;
32+
add->closureA = a;
33+
add->closureB = b;
34+
}
35+
return add;
2636
}
2737

2838

29-
OSL_SHADEOP const ClosureColor*
30-
osl_mul_closure_color(ShaderGlobals* sg, ClosureColor* a, const Color3* w)
39+
OSL_SHADEOP OSL_HOSTDEVICE const void*
40+
osl_mul_closure_color(OpaqueExecContextPtr oec, const void* a_, const void* w_)
3141
{
42+
const ClosureColor* a = (const ClosureColor*)a_;
43+
const Color3* w = (const Color3*)w_;
3244
if (a == NULL)
3345
return NULL;
3446
if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f)
3547
return NULL;
3648
if (w->x == 1.0f && w->y == 1.0f && w->z == 1.0f)
3749
return a;
38-
return sg->context->closure_mul_allot(*w, a);
50+
ClosureMul* mul = (ClosureMul*)rs_allocate_closure(oec, sizeof(ClosureMul),
51+
alignof(ClosureMul));
52+
if (mul) {
53+
mul->id = ClosureColor::MUL;
54+
mul->weight = *w;
55+
mul->closure = a;
56+
}
57+
return mul;
3958
}
4059

4160

42-
OSL_SHADEOP const ClosureColor*
43-
osl_mul_closure_float(ShaderGlobals* sg, ClosureColor* a, float w)
61+
OSL_SHADEOP OSL_HOSTDEVICE const void*
62+
osl_mul_closure_float(OpaqueExecContextPtr oec, const void* a_, float w)
4463
{
64+
const ClosureColor* a = (const ClosureColor*)a_;
4565
if (a == NULL)
4666
return NULL;
4767
if (w == 0.0f)
4868
return NULL;
4969
if (w == 1.0f)
5070
return a;
51-
return sg->context->closure_mul_allot(w, a);
71+
ClosureMul* mul = (ClosureMul*)rs_allocate_closure(oec, sizeof(ClosureMul),
72+
alignof(ClosureMul));
73+
if (mul) {
74+
mul->id = ClosureColor::MUL;
75+
mul->weight = Color3(w);
76+
mul->closure = a;
77+
}
78+
return mul;
5279
}
5380

5481

55-
OSL_SHADEOP ClosureComponent*
56-
osl_allocate_closure_component(ShaderGlobals* sg, int id, int size)
82+
OSL_SHADEOP OSL_HOSTDEVICE void*
83+
osl_allocate_closure_component(OpaqueExecContextPtr oec, int id, int size)
5784
{
58-
return sg->context->closure_component_allot(id, size, Color3(1.0f));
85+
// Allocate the component and the mul back to back
86+
const size_t needed = sizeof(ClosureComponent) + size;
87+
ClosureComponent* comp
88+
= (ClosureComponent*)rs_allocate_closure(oec, needed,
89+
alignof(ClosureComponent));
90+
if (comp) {
91+
comp->id = id;
92+
comp->w = Color3(1.0f);
93+
}
94+
return comp;
5995
}
6096

6197

6298

63-
OSL_SHADEOP ClosureColor*
64-
osl_allocate_weighted_closure_component(ShaderGlobals* sg, int id, int size,
65-
const Color3* w)
99+
OSL_SHADEOP OSL_HOSTDEVICE void*
100+
osl_allocate_weighted_closure_component(OpaqueExecContextPtr oec, int id,
101+
int size, const void* w_)
66102
{
103+
const Color3* w = (const Color3*)w_;
67104
if (w->x == 0.0f && w->y == 0.0f && w->z == 0.0f)
68105
return NULL;
69-
return sg->context->closure_component_allot(id, size, *w);
106+
// Allocate the component and the mul back to back
107+
const size_t needed = sizeof(ClosureComponent) + size;
108+
ClosureComponent* comp
109+
= (ClosureComponent*)rs_allocate_closure(oec, needed,
110+
alignof(ClosureComponent));
111+
if (comp) {
112+
comp->id = id;
113+
comp->w = *w;
114+
}
115+
return comp;
70116
}
71117

72118
// Deprecated, remove when conversion from ustring to ustringhash is finished
73119
OSL_SHADEOP const char*
74-
osl_closure_to_string(ShaderGlobals* sg, ClosureColor* c)
120+
osl_closure_to_string(OpaqueExecContextPtr oec, const void* c_)
75121
{
122+
ShaderGlobals* sg = (ShaderGlobals*)oec;
123+
const ClosureColor* c = (const ClosureColor*)c_;
76124
// Special case for printing closures
77125
std::ostringstream stream;
78126
stream.imbue(std::locale::classic()); // force C locale
@@ -82,8 +130,10 @@ osl_closure_to_string(ShaderGlobals* sg, ClosureColor* c)
82130
}
83131

84132
OSL_SHADEOP ustringhash_pod
85-
osl_closure_to_ustringhash(ShaderGlobals* sg, ClosureColor* c)
133+
osl_closure_to_ustringhash(OpaqueExecContextPtr oec, const void* c_)
86134
{
135+
ShaderGlobals* sg = (ShaderGlobals*)oec;
136+
const ClosureColor* c = (const ClosureColor*)c_;
87137
// Special case for printing closures
88138
std::ostringstream stream;
89139
stream.imbue(std::locale::classic()); // force C locale

src/liboslexec/oslexec_pvt.h

Lines changed: 2 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2242,49 +2242,11 @@ class OSLEXECPUBLIC ShadingContext {
22422242
}
22432243
#endif
22442244

2245-
ClosureComponent* closure_component_allot(int id, size_t prim_size,
2246-
const Color3& w)
2245+
void* allocate_closure(size_t size, size_t alignment)
22472246
{
2248-
// Allocate the component and the mul back to back
2249-
size_t needed = sizeof(ClosureComponent) + prim_size;
2250-
ClosureComponent* comp = (ClosureComponent*)m_closure_pool.alloc(
2251-
needed, alignof(ClosureComponent));
2252-
comp->id = id;
2253-
comp->w = w;
2254-
return comp;
2247+
return m_closure_pool.alloc(size, alignment);
22552248
}
22562249

2257-
ClosureMul* closure_mul_allot(const Color3& w, const ClosureColor* c)
2258-
{
2259-
ClosureMul* mul = (ClosureMul*)m_closure_pool.alloc(sizeof(ClosureMul),
2260-
alignof(ClosureMul));
2261-
mul->id = ClosureColor::MUL;
2262-
mul->weight = w;
2263-
mul->closure = c;
2264-
return mul;
2265-
}
2266-
2267-
ClosureMul* closure_mul_allot(float w, const ClosureColor* c)
2268-
{
2269-
ClosureMul* mul = (ClosureMul*)m_closure_pool.alloc(sizeof(ClosureMul),
2270-
alignof(ClosureMul));
2271-
mul->id = ClosureColor::MUL;
2272-
mul->weight.setValue(w, w, w);
2273-
mul->closure = c;
2274-
return mul;
2275-
}
2276-
2277-
ClosureAdd* closure_add_allot(const ClosureColor* a, const ClosureColor* b)
2278-
{
2279-
ClosureAdd* add = (ClosureAdd*)m_closure_pool.alloc(sizeof(ClosureAdd),
2280-
alignof(ClosureAdd));
2281-
add->id = ClosureColor::ADD;
2282-
add->closureA = a;
2283-
add->closureB = b;
2284-
return add;
2285-
}
2286-
2287-
22882250
/// Find the named symbol in the (already-executed!) stack of shaders of
22892251
/// the given use. If a layer is given, search just that layer. If no
22902252
/// layer is specified, priority is given to later laters over earlier

src/liboslexec/rs_fallback.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include <OSL/journal.h>
1010

11+
#include "oslexec_pvt.h"
1112

1213
// Fallback is to reroute calls back through the virtual function
1314
// based RendererServices from ShaderGlobals.
@@ -316,6 +317,18 @@ rs_trace_get(OSL::OpaqueExecContextPtr exec_ctx, OSL::ustringhash name,
316317
#endif
317318
}
318319

320+
OSL_RSOP OSL_HOSTDEVICE void*
321+
rs_allocate_closure(OSL::OpaqueExecContextPtr exec_ctx, size_t size,
322+
size_t alignment)
323+
{
324+
#ifndef __CUDA_ARCH__
325+
auto sg = get_sg(exec_ctx);
326+
return sg->context->allocate_closure(size, alignment);
327+
#else
328+
return nullptr;
329+
#endif
330+
}
331+
319332
OSL_RSOP OSL_HOSTDEVICE void
320333
rs_errorfmt(OSL::OpaqueExecContextPtr exec_ctx,
321334
OSL::ustringhash fmt_specification, int32_t count,

src/liboslexec/shadeimage.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,6 @@ shade_image(ShadingSystem& shadingsys, ShaderGroup& group,
116116
// That also implies that our normal points to (0,0,1)
117117
sg.N = Vec3(0, 0, 1);
118118
sg.Ng = Vec3(0, 0, 1);
119-
// In our SimpleRenderer, the "renderstate" itself just a pointer to
120-
// the ShaderGlobals.
121-
// sg.renderstate = &sg;
122119
}
123120

124121
// Loop over all pixels in the image (in x and y)...

src/osltoy/osltoyrenderer.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,6 @@ OSLToyRenderer::OSLToyRenderer()
115115
// That also implies that our normal points to (0,0,1)
116116
sg.N = Vec3(0, 0, 1);
117117
sg.Ng = Vec3(0, 0, 1);
118-
// In our SimpleRenderer, the "renderstate" itself just a pointer to
119-
// the ShaderGlobals.
120-
// sg.renderstate = &sg;
121118
}
122119

123120

0 commit comments

Comments
 (0)