Skip to content

Commit 634baa2

Browse files
author
Chris Hellmuth
committed
OSL Cache v0
1 parent df49a40 commit 634baa2

File tree

6 files changed

+120
-27
lines changed

6 files changed

+120
-27
lines changed

src/include/OSL/rendererservices.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,19 @@ class OSLEXECPUBLIC RendererServices {
595595
}
596596
};
597597

598+
virtual bool optix_cache_enabled() const { return false; }
599+
600+
virtual void optix_cache_insert(const std::string& key,
601+
const std::string& value) const
602+
{
603+
}
604+
605+
virtual bool optix_cache_get(const std::string& key,
606+
std::string& value) const
607+
{
608+
return false;
609+
}
610+
598611
/// A renderer may choose to support batched execution by providing pointers
599612
/// to objects satisfying the BatchedRendererServices<WidthOf<#>> interface
600613
/// for specific batch sizes.

src/liboslexec/instance.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -848,13 +848,31 @@ ShaderGroup::setup_interactive_arena(cspan<uint8_t> paramblock)
848848

849849

850850

851+
std::string
852+
ShaderGroup::optix_cache_key() const
853+
{
854+
const uint64_t ir_key = Strutil::strhash(serialize_internal());
855+
856+
std::string safegroup;
857+
safegroup = Strutil::replace(name(), "/", "_", true);
858+
safegroup = Strutil::replace(safegroup, ":", "_", true);
859+
return fmtformat("cache-osl-{}-{}", safegroup, ir_key);
860+
}
861+
851862
std::string
852863
ShaderGroup::serialize() const
864+
{
865+
lock_guard lock(m_mutex);
866+
return serialize_internal();
867+
}
868+
869+
std::string
870+
ShaderGroup::serialize_internal() const
853871
{
854872
std::ostringstream out;
855873
out.imbue(std::locale::classic()); // force C locale
856874
out.precision(9);
857-
lock_guard lock(m_mutex);
875+
858876
for (int i = 0, nl = nlayers(); i < nl; ++i) {
859877
const ShaderInstance* inst = m_layers[i].get();
860878

src/liboslexec/llvm_instance.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2150,6 +2150,7 @@ BackendLLVM::run()
21502150
ll.prune_and_internalize_module(external_functions);
21512151
}
21522152

2153+
21532154
// Debug code to dump the pre-optimized bitcode to a file
21542155
if (llvm_debug() >= 2 || shadingsys().llvm_output_bitcode()) {
21552156
// Make a safe group name that doesn't have "/" in it! Also beware
@@ -2281,6 +2282,13 @@ BackendLLVM::run()
22812282
group().llvm_compiled_layer(nlayers - 1));
22822283
}
22832284

2285+
if (use_optix() && renderer()->optix_cache_enabled()) {
2286+
std::string cache_key = group().hash_key();
2287+
renderer()->optix_cache_insert(
2288+
cache_key, optix_cache_wrap(group().m_llvm_ptx_compiled_version,
2289+
group().llvm_groupdata_size()));
2290+
}
2291+
22842292
// We are destroying the entire module below,
22852293
// no reason to bother destroying individual functions
22862294
#if 0

src/liboslexec/oslexec.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,28 @@ shadertype_from_name(string_view name)
5050
return ShaderType::Unknown;
5151
}
5252

53+
std::string
54+
optix_cache_wrap(const std::string& ptx, size_t groupdata_size)
55+
{
56+
// Cache string is the ptx file with groupdata size on top as a comment.
57+
// This way the cache string is a valid ptx program, which can be useful
58+
// for debugging.
59+
return fmtformat("// {}\n{}", groupdata_size, ptx);
60+
}
61+
62+
void
63+
optix_cache_unwrap(const std::string& cache_value, std::string& ptx,
64+
size_t& groupdata_size)
65+
{
66+
size_t groupdata_end_index = cache_value.find('\n');
67+
if (groupdata_end_index != std::string::npos) {
68+
std::string groupdata_string = cache_value.substr(3, groupdata_end_index
69+
- 3);
70+
groupdata_size = std::stoll(groupdata_string);
71+
72+
ptx = cache_value.substr(groupdata_end_index + 1);
73+
}
74+
}
5375

5476
}; // namespace pvt
5577
OSL_NAMESPACE_EXIT

src/liboslexec/oslexec_pvt.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ struct PerThreadInfo {
7878

7979
namespace pvt {
8080

81+
void
82+
optix_cache_unwrap(const std::string& cache_value, std::string& ptx,
83+
size_t& groupdata_size);
84+
std::string
85+
optix_cache_wrap(const std::string& ptx, size_t groupdata_size);
86+
8187
// forward definitions
8288
class ShadingSystemImpl;
8389
class ShaderInstance;
@@ -1735,6 +1741,9 @@ class ShaderGroup {
17351741
/// Return a reference to the shading system for this group.
17361742
ShadingSystemImpl& shadingsys() const { return m_shadingsys; }
17371743

1744+
std::string hash_key() const { return m_hash_key; }
1745+
void hash_key(std::string hash_key) { m_hash_key = hash_key; }
1746+
17381747
int optimized() const { return m_optimized; }
17391748
void optimized(int opt) { m_optimized = opt; }
17401749

@@ -1829,6 +1838,8 @@ class ShaderGroup {
18291838
void name(ustring name) { m_name = name; }
18301839
ustring name() const { return m_name; }
18311840

1841+
std::string optix_cache_key() const;
1842+
18321843
std::string serialize() const;
18331844

18341845
void lock() const { m_mutex.lock(); }
@@ -1965,6 +1976,8 @@ class ShaderGroup {
19651976
}
19661977

19671978
private:
1979+
std::string serialize_internal() const;
1980+
19681981
// Put all the things that are read-only (after optimization) and
19691982
// needed on every shade execution at the front of the struct, as much
19701983
// together on one cache line as possible.
@@ -2016,6 +2029,8 @@ class ShaderGroup {
20162029
atomic_ll m_executions { 0 }; ///< Number of times the group executed
20172030
atomic_ll m_stat_total_shading_time_ticks { 0 }; // Shading time (ticks)
20182031

2032+
std::string m_hash_key;
2033+
20192034
// PTX assembly for compiled ShaderGroup
20202035
std::string m_llvm_ptx_compiled_version;
20212036

src/liboslexec/shadingsys.cpp

Lines changed: 43 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3774,6 +3774,7 @@ ShadingSystemImpl::optimize_group(ShaderGroup& group, ShadingContext* ctx,
37743774
ctx = get_context(thread_info);
37753775
ctx_allocated = true;
37763776
}
3777+
37773778
if (!group.optimized()) {
37783779
RuntimeOptimizer rop(*this, group, ctx);
37793780
rop.run();
@@ -3823,34 +3824,50 @@ ShadingSystemImpl::optimize_group(ShaderGroup& group, ShadingContext* ctx,
38233824
}
38243825

38253826
if (need_jit) {
3826-
BackendLLVM lljitter(*this, group, ctx);
3827-
lljitter.run();
3828-
3829-
// NOTE: it is now possible to optimize and not JIT
3830-
// which would leave the cleanup to happen
3831-
// when the ShadingSystem is destroyed
3832-
3833-
// Only cleanup when are not batching or if
3834-
// the batch jit has already happened,
3835-
// as it requires the ops so we can't delete them yet!
3836-
if (((renderer()->batched(WidthOf<16>()) == nullptr)
3837-
&& (renderer()->batched(WidthOf<8>()) == nullptr)
3838-
&& (renderer()->batched(WidthOf<4>()) == nullptr))
3839-
|| group.batch_jitted()) {
3840-
group_post_jit_cleanup(group);
3827+
bool cached = false;
3828+
if (use_optix() && renderer()->optix_cache_enabled()) {
3829+
std::string cache_key = group.optix_cache_key();
3830+
group.hash_key(cache_key);
3831+
3832+
std::string cache_value;
3833+
if (renderer()->optix_cache_get(cache_key, cache_value)) {
3834+
cached = true;
3835+
optix_cache_unwrap(cache_value,
3836+
group.m_llvm_ptx_compiled_version,
3837+
group.m_llvm_groupdata_size);
3838+
}
38413839
}
38423840

3843-
group.m_jitted = true;
3844-
spin_lock stat_lock(m_stat_mutex);
3845-
m_stat_opt_locking_time += locking_time;
3846-
m_stat_optimization_time += timer();
3847-
m_stat_total_llvm_time += lljitter.m_stat_total_llvm_time;
3848-
m_stat_llvm_setup_time += lljitter.m_stat_llvm_setup_time;
3849-
m_stat_llvm_irgen_time += lljitter.m_stat_llvm_irgen_time;
3850-
m_stat_llvm_opt_time += lljitter.m_stat_llvm_opt_time;
3851-
m_stat_llvm_jit_time += lljitter.m_stat_llvm_jit_time;
3852-
m_stat_max_llvm_local_mem = std::max(m_stat_max_llvm_local_mem,
3853-
lljitter.m_llvm_local_mem);
3841+
if (!cached) {
3842+
BackendLLVM lljitter(*this, group, ctx);
3843+
lljitter.run();
3844+
3845+
// NOTE: it is now possible to optimize and not JIT
3846+
// which would leave the cleanup to happen
3847+
// when the ShadingSystem is destroyed
3848+
3849+
// Only cleanup when are not batching or if
3850+
// the batch jit has already happened,
3851+
// as it requires the ops so we can't delete them yet!
3852+
if (((renderer()->batched(WidthOf<16>()) == nullptr)
3853+
&& (renderer()->batched(WidthOf<8>()) == nullptr)
3854+
&& (renderer()->batched(WidthOf<4>()) == nullptr))
3855+
|| group.batch_jitted()) {
3856+
group_post_jit_cleanup(group);
3857+
}
3858+
3859+
group.m_jitted = true;
3860+
spin_lock stat_lock(m_stat_mutex);
3861+
m_stat_opt_locking_time += locking_time;
3862+
m_stat_optimization_time += timer();
3863+
m_stat_total_llvm_time += lljitter.m_stat_total_llvm_time;
3864+
m_stat_llvm_setup_time += lljitter.m_stat_llvm_setup_time;
3865+
m_stat_llvm_irgen_time += lljitter.m_stat_llvm_irgen_time;
3866+
m_stat_llvm_opt_time += lljitter.m_stat_llvm_opt_time;
3867+
m_stat_llvm_jit_time += lljitter.m_stat_llvm_jit_time;
3868+
m_stat_max_llvm_local_mem = std::max(m_stat_max_llvm_local_mem,
3869+
lljitter.m_llvm_local_mem);
3870+
}
38543871
}
38553872

38563873
if (ctx_allocated) {

0 commit comments

Comments
 (0)