Use custom shaders to workaround FLOAT limitation of GPUTexture

gamingrobot · gamingrobot · commit 80484b549c69 · 2025-07-07T01:09:34.000-05:00
diff --git a/BlenderMalt/MaltPipeline.py b/BlenderMalt/MaltPipeline.py
@@ -1,6 +1,5 @@
 import os, platform, time
 import bpy
-import gpu
 from BlenderMalt.MaltUtils import malt_path_getter, malt_path_setter
 from . import MaltMaterial, MaltMeshes, MaltTextures
 
@@ -76,13 +75,8 @@ def update_pipeline(self, context):
         docs_path = docs_path if os.path.exists(docs_path) else None
         
         path = bpy.path.abspath(pipeline, library=self.id_data.library)
-
-        viewport_bit_depth = int(self.viewport_bit_depth)
-        if gpu.platform.backend_type_get() != 'OPENGL':
-            viewport_bit_depth = 32 # force 32 bit depth since gpu module only supports FLOAT data_format on GPUTexture
-
         import Bridge
-        bridge = Bridge.Client_API.Bridge(path, viewport_bit_depth, debug_mode, renderdoc_path, plugin_dirs, docs_path)
+        bridge = Bridge.Client_API.Bridge(path, int(self.viewport_bit_depth), debug_mode, renderdoc_path, plugin_dirs, docs_path)
         from Malt.Utils import LOG
         LOG.info('Blender {} {} {}'.format(bpy.app.version_string, bpy.app.build_branch, bpy.app.build_hash))
         params = bridge.get_parameters()
diff --git a/BlenderMalt/MaltRenderEngine.py b/BlenderMalt/MaltRenderEngine.py
@@ -359,6 +359,7 @@ def view_draw(self, context, depsgraph):
             if region.type == 'UI':
                 region.tag_redraw()
 
+        global DISPLAY_DRAW
         if self.render_backend == 'OPENGL':
             fbo = GL.gl_buffer(GL.GL_INT, 1)
             GL.glGetIntegerv(GL.GL_FRAMEBUFFER_BINDING, fbo)
@@ -382,24 +383,33 @@ def view_draw(self, context, depsgraph):
                 render_texture = Texture(resolution, GL.GL_RGBA8, GL.GL_UNSIGNED_BYTE, pixels.buffer(),
                     mag_filter=mag_filter)
             
-            global DISPLAY_DRAW
             if DISPLAY_DRAW is None:
-                DISPLAY_DRAW = DisplayDraw()
+                DISPLAY_DRAW = DisplayDrawGL()
             DISPLAY_DRAW.draw(fbo, render_texture)
         else:
             import gpu
-            from gpu_extras.presets import draw_texture_2d
-            data_format = 'FLOAT' # GPUTexture only supports 'FLOAT' buffer types
+            data_size = len(pixels)
+            w,h = resolution
+            if self.bridge.viewport_bit_depth == 8:
+                data_size = data_size // 4
+                h = h // 4
+            elif self.bridge.viewport_bit_depth == 16:
+                data_size = data_size // 2
+                h = h // 2
+            data_format = 'FLOAT' #Pretend we are uploading float data, since it's the only supported format.
             texture_format = 'RGBA32F'
-            #TODO do we need the sRGBConversion shader?
-            buffer = gpu.types.Buffer(data_format, len(pixels), pixels.buffer())
-            render_texture = gpu.types.GPUTexture(viewport_resolution, format=texture_format, data=buffer)
-            draw_texture_2d(render_texture, (0, 0), render_texture.width, render_texture.height)
+            data_as_float = (ctypes.c_float * data_size).from_address(pixels._buffer.data)
+            buffer = gpu.types.Buffer(data_format, data_size, data_as_float)
+            render_texture = gpu.types.GPUTexture((w, h), format=texture_format, data=buffer)
+
+            if DISPLAY_DRAW is None:
+                DISPLAY_DRAW = DisplayDrawGPU()
+            DISPLAY_DRAW.draw(self.bridge.viewport_bit_depth, resolution, render_texture)
 
 
 DISPLAY_DRAW = None
 
-class DisplayDraw():
+class DisplayDrawGL():
     def __init__(self):
         positions=[
              1.0,  1.0, 1.0,
@@ -425,6 +435,108 @@ def draw(self, fbo, texture):
         self.shader.bind()
         self.quad.draw()
 
+class DisplayDrawGPU():    
+    def __init__(self):
+        import gpu
+        from gpu_extras.batch import batch_for_shader
+
+        vertex_src = """
+        void main()
+        {
+            IO_POSITION = IN_POSITION * vec3(1000, 1000, 0.5);
+            gl_Position = vec4(IO_POSITION, 1);
+        }
+        """
+
+        pixel_src = """
+        vec3 srgb_to_linear(vec3 srgb)
+        {
+            vec3 low = srgb / 12.92;
+            vec3 high = pow((srgb + 0.055)/1.055, vec3(2.4));
+            return mix(low, high, greaterThan(srgb, vec3(0.04045)));
+        }
+
+        void main()
+        {
+            vec2 uv =  IO_POSITION.xy * 0.5 + 0.5;
+
+            int divisor = 32 / bit_depth;
+
+            ivec2 output_texel = ivec2(vec2(output_res) * uv);
+            int output_texel_linear = output_texel.y * output_res.x + output_texel.x;
+            
+            int texel_linear_read = output_texel_linear / divisor;
+            ivec2 texel_read = ivec2(texel_linear_read % output_res.x, texel_linear_read / output_res.x);
+            int sub_texel_index = output_texel_linear % divisor;
+
+            vec4 texel_value = texelFetch(input_texture, texel_read, 0);
+
+            if(bit_depth == 32)
+            {
+                OUT_COLOR = texel_value;
+            }
+            else if(bit_depth == 16)
+            {
+                vec2 sub_texel_value = sub_texel_index == 0 ? texel_value.xy : texel_value.zw;
+
+                uint packed_xy = floatBitsToUint(sub_texel_value.x);
+                uint packed_yz = floatBitsToUint(sub_texel_value.y);
+
+                OUT_COLOR.rg = unpackHalf2x16(packed_xy);
+                OUT_COLOR.ba = unpackHalf2x16(packed_yz);
+            }
+            else if(bit_depth == 8)
+            {
+                float sub_texel_value = texel_value[sub_texel_index];
+                uint packed_value = floatBitsToUint(sub_texel_value);
+                OUT_COLOR = unpackUnorm4x8(packed_value);
+                OUT_COLOR.rgb = srgb_to_linear(OUT_COLOR.rgb);
+            }
+            else{
+                OUT_COLOR = vec4(1,1,0,1);
+            }
+        }
+        """
+
+        self.iface = gpu.types.GPUStageInterfaceInfo("IFace")
+        self.iface.smooth('VEC3', "IO_POSITION")
+        
+        self.sh_info = gpu.types.GPUShaderCreateInfo()
+        self.sh_info.push_constant('INT', "bit_depth")
+        self.sh_info.push_constant('IVEC2', "output_res")
+        self.sh_info.sampler(0, 'FLOAT_2D', "input_texture")
+        self.sh_info.vertex_source(vertex_src)
+        self.sh_info.vertex_in(0, 'VEC3', "IN_POSITION")
+        self.sh_info.vertex_out(self.iface)
+        self.sh_info.fragment_source(pixel_src)
+        self.sh_info.fragment_out(0, 'VEC4', "OUT_COLOR")
+
+        self.shader = gpu.shader.create_from_info(self.sh_info)
+
+        positions=[
+            ( 1.0,  1.0, 1.0),
+            ( 1.0, -1.0, 1.0),
+            (-1.0, -1.0, 1.0),
+            (-1.0,  1.0, 1.0),
+        ]
+        indices=[
+            (0, 1, 3),
+            (1, 2, 3),
+        ]
+        
+        self.quad = batch_for_shader(
+            self.shader, 'TRIS',
+            {"IN_POSITION": positions},
+            indices=indices
+        )
+
+    def draw(self, bit_depth, resolution, texture):
+        self.shader.bind()
+        self.shader.uniform_int("bit_depth", bit_depth)
+        self.shader.uniform_int("output_res", resolution)
+        self.shader.uniform_sampler("input_texture", texture)
+        self.quad.draw(self.shader)
+
 
 class OT_MaltRenderDocCapture(bpy.types.Operator):
     bl_idname = "wm.malt_renderdoc_capture"