Skip to content

Commit c0f8462

Browse files
committed
Move closure pool to render state.
Signed-off-by: Curtis Black <curtis.w.black@gmail.com>
1 parent b112bb5 commit c0f8462

File tree

9 files changed

+76
-38
lines changed

9 files changed

+76
-38
lines changed

src/testrender/cuda/optix_raytracer.cu

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,18 @@ __device__ __constant__ RenderParams render_params;
4848

4949

5050
static inline __device__ void
51-
execute_shader(ShaderGlobalsType& sg, const int shader_id, char* closure_pool)
51+
execute_shader(ShaderGlobalsType& sg, const int shader_id, StackClosurePool& closure_pool)
5252
{
5353
if (shader_id < 0) {
5454
// TODO: should probably never get here ...
5555
return;
5656
}
5757

58-
// Pack the "closure pool" into one of the ShaderGlobals pointers
59-
*(int*)&closure_pool[0] = 0;
60-
sg.renderstate = &closure_pool[0];
58+
closure_pool.reset();
59+
RenderState renderState;
60+
// TODO: renderState.context = ...
61+
renderState.closure_pool = &closure_pool;
62+
sg.renderstate = &renderState;
6163

6264
// Pack the pointers to the options structs in a faux "context",
6365
// which is a rough stand-in for the host ShadingContext.

src/testrender/simpleraytracer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -945,7 +945,7 @@ SimpleRaytracer::eval_background(const Dual2<Vec3>& dir, ShadingContext* ctx,
945945
#ifndef __CUDACC__
946946
shadingsys->execute(*ctx, *m_shaders[backgroundShaderID].surf, sg);
947947
#else
948-
alignas(8) char closure_pool[256];
948+
StackClosurePool closure_pool;
949949
execute_shader(sg, render_params.bg_id, closure_pool);
950950
#endif
951951
return process_background_closure((const ClosureColor*)sg.Ci);
@@ -957,8 +957,8 @@ SimpleRaytracer::subpixel_radiance(float x, float y, Sampler& sampler,
957957
{
958958
#ifdef __CUDACC__
959959
// Scratch space for the output closures
960-
alignas(8) char closure_pool[256];
961-
alignas(8) char light_closure_pool[256];
960+
StackClosurePool closure_pool;
961+
StackClosurePool light_closure_pool;
962962
#endif
963963

964964
constexpr float inf = std::numeric_limits<float>::infinity();

src/testshade/cuda/optix_grid_renderer.cu

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ __raygen__()
9898
// networks, so there should be (at least) some mechanism to issue a
9999
// warning or error if the closure or param storage can possibly be
100100
// exceeded.
101-
alignas(8) char closure_pool[256];
101+
StackClosurePool closure_pool;
102102
alignas(8) char params[256];
103103

104104
OSL_CUDA::ShaderGlobals sg;
@@ -137,8 +137,11 @@ __raygen__()
137137
sg.object2common = reinterpret_cast<void*>(render_params.object2common);
138138

139139
// Pack the "closure pool" into one of the ShaderGlobals pointers
140-
*(int*)&closure_pool[0] = 0;
141-
sg.renderstate = &closure_pool[0];
140+
closure_pool.reset();
141+
RenderState renderState;
142+
// TODO: renderState.context = ...
143+
renderState.closure_pool = &closure_pool;
144+
sg.renderstate = &renderState;
142145

143146
// Run the OSL group and init functions
144147
if (render_params.fused_callable)

src/testshade/render_state.h

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
// All the the state free functions in rs_simplerend.cpp will need to do their job
1111
// NOTE: Additional data is here that will be used by rs_simplerend.cpp in future PR's
1212
// procedurally generating ShaderGlobals.
13-
struct RenderState {
13+
struct RenderContext {
1414
int xres;
1515
int yres;
1616
OSL::Matrix44 world_to_camera;
@@ -24,6 +24,35 @@ struct RenderState {
2424
void* journal_buffer;
2525
};
2626

27+
class StackClosurePool {
28+
alignas(8) char buffer[256];
29+
void* ptr;
30+
31+
public:
32+
StackClosurePool() { reset(); }
33+
34+
void reset()
35+
{
36+
ptr = &buffer[0];
37+
*(int*)ptr = 0;
38+
}
39+
40+
void* allocate(size_t size, size_t alignment)
41+
{
42+
uintptr_t p = OIIO::round_to_multiple_of_pow2((uintptr_t)ptr,
43+
alignment);
44+
ptr = (void*)(p + size);
45+
if (ptr <= &buffer[256])
46+
return p;
47+
return nullptr;
48+
}
49+
}
50+
51+
struct RenderState {
52+
RenderContext* context;
53+
StackClosurePool* closure_pool;
54+
};
55+
2756

2857
// Create constexpr hashes for all strings used by the free function renderer services.
2958
// NOTE: Actually ustring's should also be instantiated in host code someplace as well

src/testshade/rs_simplerend.cpp

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ rs_get_inverse_matrix_space_time(OSL::OpaqueExecContextPtr ec,
6666
using OSL::Matrix44;
6767

6868

69-
auto rs = OSL::get_rs<RenderState>(ec);
69+
auto rs = OSL::get_rs<RenderState>(ec)->context;
7070
if (to == OSL::Hashes::camera || to == OSL::Hashes::screen
7171
|| to == OSL::Hashes::NDC || to == RS::Hashes::raster) {
7272
Matrix44 M { rs->world_to_camera };
@@ -372,11 +372,8 @@ rs_trace_get(OSL::OpaqueExecContextPtr ec, OSL::ustringhash name,
372372
OSL_RSOP OSL_HOSTDEVICE void*
373373
rs_allocate_closure(OSL::OpaqueExecContextPtr ec, size_t size, size_t alignment)
374374
{
375-
auto sg = (OSL::ShaderGlobals*)ec;
376-
uintptr_t ptr = OIIO::round_to_multiple_of_pow2((uintptr_t)sg->renderstate,
377-
alignment);
378-
sg->renderstate = (void*)(ptr + size);
379-
return (void*)ptr;
375+
auto rs = OSL::get_rs<RenderState>(ec);
376+
return rs->closure_pool->allocate(size, alignment);
380377
}
381378
#endif
382379

@@ -503,7 +500,7 @@ rs_get_attribute(OSL::OpaqueExecContextPtr oec, OSL::ustringhash_pod object_,
503500
auto object = OSL::ustringhash_from(object_);
504501
auto name = OSL::ustringhash_from(name_);
505502
const OSL::TypeDesc type = OSL::TypeDesc_from(_type);
506-
auto rs = OSL::get_rs<RenderState>(oec);
503+
auto rs = OSL::get_rs<RenderState>(oec)->context;
507504

508505
// The many branches in the code below handle the case where we don't know
509506
// the attribute name at compile time. In the case it is known, dead-code
@@ -648,7 +645,7 @@ rs_errorfmt(OSL::OpaqueExecContextPtr ec, OSL::ustringhash fmt_specification,
648645
int32_t arg_count, const OSL::EncodedType* argTypes,
649646
uint32_t argValuesSize, uint8_t* argValues)
650647
{
651-
auto rs = OSL::get_rs<RenderState>(ec);
648+
auto rs = OSL::get_rs<RenderState>(ec)->context;
652649

653650
OSL::journal::Writer jw { rs->journal_buffer };
654651
jw.record_errorfmt(OSL::get_thread_index(ec), OSL::get_shade_index(ec),
@@ -661,7 +658,7 @@ rs_warningfmt(OSL::OpaqueExecContextPtr ec, OSL::ustringhash fmt_specification,
661658
int32_t arg_count, const OSL::EncodedType* argTypes,
662659
uint32_t argValuesSize, uint8_t* argValues)
663660
{
664-
auto rs = OSL::get_rs<RenderState>(ec);
661+
auto rs = OSL::get_rs<RenderState>(ec)->context;
665662

666663
OSL::journal::Writer jw { rs->journal_buffer };
667664
jw.record_warningfmt(OSL::get_max_warnings_per_thread(ec),
@@ -676,7 +673,7 @@ rs_printfmt(OSL::OpaqueExecContextPtr ec, OSL::ustringhash fmt_specification,
676673
int32_t arg_count, const OSL::EncodedType* argTypes,
677674
uint32_t argValuesSize, uint8_t* argValues)
678675
{
679-
auto rs = OSL::get_rs<RenderState>(ec);
676+
auto rs = OSL::get_rs<RenderState>(ec)->context;
680677

681678
OSL::journal::Writer jw { rs->journal_buffer };
682679
jw.record_printfmt(OSL::get_thread_index(ec), OSL::get_shade_index(ec),
@@ -691,7 +688,7 @@ rs_filefmt(OSL::OpaqueExecContextPtr ec, OSL::ustringhash filename_hash,
691688
const OSL::EncodedType* argTypes, uint32_t argValuesSize,
692689
uint8_t* argValues)
693690
{
694-
auto rs = OSL::get_rs<RenderState>(ec);
691+
auto rs = OSL::get_rs<RenderState>(ec)->context;
695692

696693
OSL::journal::Writer jw { rs->journal_buffer };
697694
jw.record_filefmt(OSL::get_thread_index(ec), OSL::get_shade_index(ec),

src/testshade/simplerend.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,7 +1049,7 @@ SimpleRenderer::add_output(string_view varname_, string_view filename,
10491049

10501050

10511051
void
1052-
SimpleRenderer::export_state(RenderState& state) const
1052+
SimpleRenderer::export_state(RenderContext& state) const
10531053
{
10541054
state.xres = m_xres;
10551055
state.yres = m_yres;
@@ -1073,7 +1073,7 @@ SimpleRenderer::errorfmt(OSL::ShaderGlobals* sg,
10731073
const EncodedType* arg_types, uint32_t arg_values_size,
10741074
uint8_t* argValues)
10751075
{
1076-
RenderState* rs = reinterpret_cast<RenderState*>(sg->renderstate);
1076+
RenderContext* rs = reinterpret_cast<RenderState*>(sg->renderstate)->context;
10771077
OSL::journal::Writer jw { rs->journal_buffer };
10781078
jw.record_errorfmt(OSL::get_thread_index(sg), OSL::get_shade_index(sg),
10791079
fmt_specification, arg_count, arg_types, arg_values_size,
@@ -1086,7 +1086,7 @@ SimpleRenderer::warningfmt(OSL::ShaderGlobals* sg,
10861086
int32_t arg_count, const EncodedType* arg_types,
10871087
uint32_t arg_values_size, uint8_t* argValues)
10881088
{
1089-
RenderState* rs = reinterpret_cast<RenderState*>(sg->renderstate);
1089+
RenderContext* rs = reinterpret_cast<RenderState*>(sg->renderstate)->context;
10901090
OSL::journal::Writer jw { rs->journal_buffer };
10911091
jw.record_warningfmt(OSL::get_max_warnings_per_thread(sg),
10921092
OSL::get_thread_index(sg), OSL::get_shade_index(sg),
@@ -1102,7 +1102,7 @@ SimpleRenderer::printfmt(OSL::ShaderGlobals* sg,
11021102
const EncodedType* arg_types, uint32_t arg_values_size,
11031103
uint8_t* argValues)
11041104
{
1105-
RenderState* rs = reinterpret_cast<RenderState*>(sg->renderstate);
1105+
RenderContext* rs = reinterpret_cast<RenderState*>(sg->renderstate)->context;
11061106
OSL::journal::Writer jw { rs->journal_buffer };
11071107
jw.record_printfmt(OSL::get_thread_index(sg), OSL::get_shade_index(sg),
11081108
fmt_specification, arg_count, arg_types, arg_values_size,
@@ -1115,7 +1115,7 @@ SimpleRenderer::filefmt(OSL::ShaderGlobals* sg, OSL::ustringhash filename_hash,
11151115
const EncodedType* arg_types, uint32_t arg_values_size,
11161116
uint8_t* argValues)
11171117
{
1118-
RenderState* rs = reinterpret_cast<RenderState*>(sg->renderstate);
1118+
RenderContext* rs = reinterpret_cast<RenderState*>(sg->renderstate)->context;
11191119
OSL::journal::Writer jw { rs->journal_buffer };
11201120
jw.record_filefmt(OSL::get_thread_index(sg), OSL::get_shade_index(sg),
11211121
filename_hash, fmt_specification, arg_count, arg_types,

src/testshade/simplerend.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ class SimpleRenderer : public RendererServices {
145145
size_t noutputs() const { return m_outputbufs.size(); }
146146

147147
virtual void init_shadingsys(ShadingSystem* ss) { shadingsys = ss; }
148-
virtual void export_state(RenderState&) const;
148+
virtual void export_state(RenderContext&) const;
149149
virtual void prepare_render() {}
150150
virtual void warmup() {}
151151
virtual void render(int /*xres*/, int /*yres*/) {}

src/testshade/testshade.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -945,21 +945,23 @@ setup_transformations(SimpleRenderer& rend, OSL::Matrix44& Mshad,
945945
rend.name_transform("myspace", Mmyspace);
946946
}
947947

948-
// NOTE: each host thread could end up with its own RenderState.
949-
// Starting simple with a single instance for now
950-
static RenderState theRenderState;
948+
// A single render context shared by all render threads.
949+
static RenderContext theRenderState;
951950

952951

953952
// Set up the ShaderGlobals fields for pixel (x,y).
954953
static void
955-
setup_shaderglobals(ShaderGlobals& sg, ShadingSystem* shadingsys, int x, int y)
954+
setup_shaderglobals(ShaderGlobals& sg, ShadingSystem* shadingsys,
955+
RenderState& renderState, int x, int y)
956956
{
957957
// Just zero the whole thing out to start
958958
memset((char*)&sg, 0, sizeof(ShaderGlobals));
959959

960960
// Any state data needed by SimpleRenderer or its free function equivalent
961961
// will need to be passed here the ShaderGlobals.
962-
sg.renderstate = &theRenderState;
962+
renderState.context = &theRenderState;
963+
renderState.closure_pool = nullptr; // Use inbuilt closure pool.
964+
sg.renderstate = &renderState;
963965

964966
// Set "shader" space to be Mshad. In a real renderer, this may be
965967
// different for each shader group.
@@ -1182,7 +1184,8 @@ setup_output_images(SimpleRenderer* rend, ShadingSystem* shadingsys,
11821184
ShadingContext* ctx = shadingsys->get_context(thread_info);
11831185
raytype_bit = shadingsys->raytype_bit(ustring(raytype_name));
11841186
ShaderGlobals sg;
1185-
setup_shaderglobals(sg, shadingsys, 0, 0);
1187+
RenderState renderState;
1188+
setup_shaderglobals(sg, shadingsys, renderState, 0, 0);
11861189

11871190
#if OSL_USE_BATCHED
11881191
if (batched) {
@@ -1586,6 +1589,7 @@ shade_region(SimpleRenderer* rend, ShaderGroup* shadergroup, OIIO::ROI roi,
15861589

15871590
// Set up shader globals and a little test grid of points to shade.
15881591
ShaderGlobals shaderglobals;
1592+
RenderState renderState;
15891593

15901594
raytype_bit = shadingsys->raytype_bit(ustring(raytype_name));
15911595

@@ -1606,7 +1610,7 @@ shade_region(SimpleRenderer* rend, ShaderGroup* shadergroup, OIIO::ROI roi,
16061610
// set it up rigged to look like we're rendering a single
16071611
// quadrilateral that exactly fills the viewport, and that
16081612
// setup is done in the following function call:
1609-
setup_shaderglobals(shaderglobals, shadingsys, x, y);
1613+
setup_shaderglobals(shaderglobals, shadingsys, renderState, x, y);
16101614

16111615
if (this_threads_index == uninitialized_thread_index) {
16121616
this_threads_index = next_thread_index.fetch_add(1u);

testsuite/example-cuda/cuda_grid_renderer.cu

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ shade(float3* Cout, int w, int h)
7474
// networks, so there should be (at least) some mechanism to issue a
7575
// warning or error if the closure or param storage can possibly be
7676
// exceeded.
77-
alignas(8) char closure_pool[256];
77+
StackClosurePool closure_pool;
7878
alignas(8) char params[256];
7979

8080
const float invw = 1.0 / w;
@@ -115,8 +115,11 @@ shade(float3* Cout, int w, int h)
115115
sg.flipHandedness = 0;
116116

117117
// Pack the "closure pool" into one of the ShaderGlobals pointers
118-
*(int*)&closure_pool[0] = 0;
119-
sg.renderstate = &closure_pool[0];
118+
closure_pool.reset();
119+
RenderState renderState;
120+
// TODO: renderState.context = ...
121+
renderState.closure_pool = &closure_pool;
122+
sg.renderstate = &renderState;
120123

121124
// Run the shader
122125
__osl__init(&sg, params);

0 commit comments

Comments
 (0)