@@ -79,6 +79,7 @@ static void process_GOOGLE_cpp_style_line_directive(OpenGLContext const& context
7979static void process_OVR_multiview2 (OpenGLContext const & context, int32_t eyeCount, char * source,
8080 size_t len) noexcept ;
8181static std::string_view process_ARB_shading_language_packing (OpenGLContext& context) noexcept ;
82+ static std::string_view process_countBits (OpenGLContext& context) noexcept ;
8283static std::array<std::string_view, 3 > splitShaderSource (std::string_view source);
8384
8485// ------------------------------------------------------------------------------------------------
@@ -779,6 +780,7 @@ void ShaderCompilerService::cancelPendingSynchronousProgram(program_token_t cons
779780
780781 // add support for ARB_shading_language_packing if needed
781782 auto const packingFunctions = process_ARB_shading_language_packing (context);
783+ auto const countBitsFunctions = process_countBits (context);
782784
783785 // split shader source, so we can insert the specialization constants and the packing
784786 // functions
@@ -789,10 +791,10 @@ void ShaderCompilerService::cancelPendingSynchronousProgram(program_token_t cons
789791 version = " #version 310 es\n " ;
790792 }
791793
792- std::array<std::string_view, 5 > sources = {
794+ std::array<std::string_view, 6 > sources = {
793795 version, prolog,
794796 { specializationConstantString.data (), specializationConstantString.size () },
795- packingFunctions,
797+ packingFunctions, countBitsFunctions,
796798 { body.data (), body.size () - 1 } // null-terminated
797799 };
798800
@@ -803,8 +805,8 @@ void ShaderCompilerService::cancelPendingSynchronousProgram(program_token_t cons
803805 [](std::string_view s) { return !s.empty (); });
804806 size_t const count = std::distance (sources.begin (), partitionPoint);
805807
806- std::array<const char *, 5 > shaderStrings;
807- std::array<GLint, 5 > lengths;
808+ std::array<const char *, 6 > shaderStrings;
809+ std::array<GLint, 6 > lengths;
808810 for (size_t j = 0 ; j < count; j++) {
809811 shaderStrings[j] = sources[j].data ();
810812 lengths[j] = GLint (sources[j].size ());
@@ -1085,6 +1087,22 @@ UTILS_NOINLINE
10851087 }
10861088}
10871089
1090+ /* static */ std::string_view process_countBits (OpenGLContext& context) noexcept {
1091+ using namespace std ::literals;
1092+ if (context.isAtLeastGL <4 , 0 >() || context.isAtLeastGLES <3 , 1 >()) {
1093+ return " " sv;
1094+ }
1095+ return R"(
1096+ // https://graphics.stanford.edu/%7Eseander/bithacks.html
1097+ int bitCount(highp uint value) {
1098+ value = value - ((value >> 1u) & 0x55555555u);
1099+ value = (value & 0x33333333u) + ((value >> 2u) & 0x33333333u);
1100+ return int(((value + (value >> 4u) & 0xF0F0F0Fu) * 0x1010101u) >> 24u);
1101+ }
1102+ )" sv;
1103+ }
1104+
1105+
10881106// Tragically, OpenGL 4.1 doesn't support unpackHalf2x16 (appeared in 4.2) and
10891107// macOS doesn't support GL_ARB_shading_language_packing
10901108// Also GLES3.0 didn't have the full set of packing/unpacking functions
0 commit comments