Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/include/OSL/rendererservices.h
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,19 @@ class OSLEXECPUBLIC RendererServices {
}
};

virtual bool optix_cache_enabled() const { return false; }

virtual void optix_cache_insert(const std::string& key,
const std::string& value) const
{
}

virtual bool optix_cache_get(const std::string& key,
std::string& value) const
{
return false;
}
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The external cache API. Should the names include "optix" or be generic?


/// A renderer may choose to support batched execution by providing pointers
/// to objects satisfying the BatchedRendererServices<WidthOf<#>> interface
/// for specific batch sizes.
Expand Down
24 changes: 23 additions & 1 deletion src/liboslexec/instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -848,13 +848,35 @@ ShaderGroup::setup_interactive_arena(cspan<uint8_t> paramblock)



std::string
ShaderGroup::generate_optix_cache_key()
{
const uint64_t ir_key = Strutil::strhash(serialize_internal());

std::string safegroup;
safegroup = Strutil::replace(name(), "/", "_", true);
safegroup = Strutil::replace(safegroup, ":", "_", true);

std::string cache_key = fmtformat("cache-osl-{}-{}", safegroup, ir_key);

m_optix_cache_key = cache_key;
return m_optix_cache_key;
}

std::string
ShaderGroup::serialize() const
{
lock_guard lock(m_mutex);
return serialize_internal();
}

std::string
ShaderGroup::serialize_internal() const
{
std::ostringstream out;
out.imbue(std::locale::classic()); // force C locale
out.precision(9);
lock_guard lock(m_mutex);
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This lock caused me some trouble. ShadingSystemImpl::optimize_group grabs the same lock, and then calls serialize, so we deadlock without a change.

This is a quick fix, a recursive mutex would be another. I don't understand the locking scheme well enough to know the right call here.


for (int i = 0, nl = nlayers(); i < nl; ++i) {
const ShaderInstance* inst = m_layers[i].get();

Expand Down
8 changes: 8 additions & 0 deletions src/liboslexec/llvm_instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2150,6 +2150,7 @@ BackendLLVM::run()
ll.prune_and_internalize_module(external_functions);
}


// Debug code to dump the pre-optimized bitcode to a file
if (llvm_debug() >= 2 || shadingsys().llvm_output_bitcode()) {
// Make a safe group name that doesn't have "/" in it! Also beware
Expand Down Expand Up @@ -2281,6 +2282,13 @@ BackendLLVM::run()
group().llvm_compiled_layer(nlayers - 1));
}

if (use_optix() && renderer()->optix_cache_enabled()) {
std::string cache_key = group().optix_cache_key();
renderer()->optix_cache_insert(
cache_key, optix_cache_wrap(group().m_llvm_ptx_compiled_version,
group().llvm_groupdata_size()));
}

// We are destroying the entire module below,
// no reason to bother destroying individual functions
#if 0
Expand Down
22 changes: 22 additions & 0 deletions src/liboslexec/oslexec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,28 @@ shadertype_from_name(string_view name)
return ShaderType::Unknown;
}

std::string
optix_cache_wrap(const std::string& ptx, size_t groupdata_size)
{
// Cache string is the ptx file with groupdata size on top as a comment.
// This way the cache string is a valid ptx program, which can be useful
// for debugging.
return fmtformat("// {}\n{}", groupdata_size, ptx);
}

void
optix_cache_unwrap(const std::string& cache_value, std::string& ptx,
size_t& groupdata_size)
{
size_t groupdata_end_index = cache_value.find('\n');
if (groupdata_end_index != std::string::npos) {
std::string groupdata_string = cache_value.substr(3, groupdata_end_index
- 3);
groupdata_size = std::stoll(groupdata_string);

ptx = cache_value.substr(groupdata_end_index + 1);
}
}

}; // namespace pvt
OSL_NAMESPACE_EXIT
13 changes: 13 additions & 0 deletions src/liboslexec/oslexec_pvt.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ struct PerThreadInfo {

namespace pvt {

void
optix_cache_unwrap(const std::string& cache_value, std::string& ptx,
size_t& groupdata_size);
std::string
optix_cache_wrap(const std::string& ptx, size_t groupdata_size);
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wasn't sure where to put these functions. They should be paired up, but one is used in llvm_instance and the other in shadingsys.


// forward definitions
class ShadingSystemImpl;
class ShaderInstance;
Expand Down Expand Up @@ -1829,6 +1835,9 @@ class ShaderGroup {
void name(ustring name) { m_name = name; }
ustring name() const { return m_name; }

std::string generate_optix_cache_key();
std::string optix_cache_key() const { return m_optix_cache_key; }

std::string serialize() const;

void lock() const { m_mutex.lock(); }
Expand Down Expand Up @@ -1965,6 +1974,8 @@ class ShaderGroup {
}

private:
std::string serialize_internal() const;

// Put all the things that are read-only (after optimization) and
// needed on every shade execution at the front of the struct, as much
// together on one cache line as possible.
Expand Down Expand Up @@ -2016,6 +2027,8 @@ class ShaderGroup {
atomic_ll m_executions { 0 }; ///< Number of times the group executed
atomic_ll m_stat_total_shading_time_ticks { 0 }; // Shading time (ticks)

std::string m_optix_cache_key;

// PTX assembly for compiled ShaderGroup
std::string m_llvm_ptx_compiled_version;

Expand Down
68 changes: 42 additions & 26 deletions src/liboslexec/shadingsys.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3774,6 +3774,7 @@ ShadingSystemImpl::optimize_group(ShaderGroup& group, ShadingContext* ctx,
ctx = get_context(thread_info);
ctx_allocated = true;
}

if (!group.optimized()) {
RuntimeOptimizer rop(*this, group, ctx);
rop.run();
Expand Down Expand Up @@ -3823,34 +3824,49 @@ ShadingSystemImpl::optimize_group(ShaderGroup& group, ShadingContext* ctx,
}

if (need_jit) {
BackendLLVM lljitter(*this, group, ctx);
lljitter.run();

// NOTE: it is now possible to optimize and not JIT
// which would leave the cleanup to happen
// when the ShadingSystem is destroyed

// Only cleanup when are not batching or if
// the batch jit has already happened,
// as it requires the ops so we can't delete them yet!
if (((renderer()->batched(WidthOf<16>()) == nullptr)
&& (renderer()->batched(WidthOf<8>()) == nullptr)
&& (renderer()->batched(WidthOf<4>()) == nullptr))
|| group.batch_jitted()) {
group_post_jit_cleanup(group);
bool cached = false;
if (use_optix() && renderer()->optix_cache_enabled()) {
std::string cache_key = group.generate_optix_cache_key();

std::string cache_value;
if (renderer()->optix_cache_get(cache_key, cache_value)) {
cached = true;
optix_cache_unwrap(cache_value,
group.m_llvm_ptx_compiled_version,
group.m_llvm_groupdata_size);
}
}

group.m_jitted = true;
spin_lock stat_lock(m_stat_mutex);
m_stat_opt_locking_time += locking_time;
m_stat_optimization_time += timer();
m_stat_total_llvm_time += lljitter.m_stat_total_llvm_time;
m_stat_llvm_setup_time += lljitter.m_stat_llvm_setup_time;
m_stat_llvm_irgen_time += lljitter.m_stat_llvm_irgen_time;
m_stat_llvm_opt_time += lljitter.m_stat_llvm_opt_time;
m_stat_llvm_jit_time += lljitter.m_stat_llvm_jit_time;
m_stat_max_llvm_local_mem = std::max(m_stat_max_llvm_local_mem,
lljitter.m_llvm_local_mem);
if (!cached) {
BackendLLVM lljitter(*this, group, ctx);
lljitter.run();

// NOTE: it is now possible to optimize and not JIT
// which would leave the cleanup to happen
// when the ShadingSystem is destroyed

// Only cleanup when are not batching or if
// the batch jit has already happened,
// as it requires the ops so we can't delete them yet!
if (((renderer()->batched(WidthOf<16>()) == nullptr)
&& (renderer()->batched(WidthOf<8>()) == nullptr)
&& (renderer()->batched(WidthOf<4>()) == nullptr))
|| group.batch_jitted()) {
group_post_jit_cleanup(group);
}

group.m_jitted = true;
spin_lock stat_lock(m_stat_mutex);
m_stat_opt_locking_time += locking_time;
m_stat_optimization_time += timer();
m_stat_total_llvm_time += lljitter.m_stat_total_llvm_time;
m_stat_llvm_setup_time += lljitter.m_stat_llvm_setup_time;
m_stat_llvm_irgen_time += lljitter.m_stat_llvm_irgen_time;
m_stat_llvm_opt_time += lljitter.m_stat_llvm_opt_time;
m_stat_llvm_jit_time += lljitter.m_stat_llvm_jit_time;
m_stat_max_llvm_local_mem = std::max(m_stat_max_llvm_local_mem,
lljitter.m_llvm_local_mem);
}
}

if (ctx_allocated) {
Expand Down
Loading