upgraded to wgpu 29 and switched to front to back blending

KeKsBoTer · KeKsBoTer · commit f1223191ab82 · 2026-03-27T11:32:06.000+01:00
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -8,13 +8,13 @@ description = "3D Gaussian Splatting Viewer"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-wgpu = { version = "27.0" }
+wgpu = { version = "29.0.1" }
 winit = "0.30.12"
 cgmath = { version = "0.18.0", features = [
     "bytemuck",
 ], git = 'https://github.com/rustgd/cgmath' }
-env_logger = "0.11.9"
-clap = { version = "4.5.60", features = ["derive"] }
+env_logger = "0.11.10"
+clap = { version = "4.6", features = ["derive"] }
 bytemuck = { version = "1.25", features = ["derive"] }
 anyhow = "1.0.102"
 ply-rs = "0.1.3"
@@ -25,15 +25,14 @@ num-traits = "0.2.19"
 half = { version = "2.7.1", features = ["bytemuck", "num-traits"] }
 log = "0.4.29"
 rayon = "1.11.0"
-image = "0.25.9"
+image = "0.25.10"
 indicatif = "0.18.4"
 
 
-egui = "0.33.3"
-egui-wgpu = "0.33.3"
-egui-winit = { version = "0.33.3", features = [], default-features = false }
-egui_plot = "0.34.1"
-egui_dnd = "0.14.0"
+egui = "0.34.0"
+egui-wgpu = "0.34.0"
+egui-winit = { version = "0.34.0", features = [], default-features = false }
+egui_plot = "0.35.0"
 
 rand = "0.10.0"
 npyz = { version = "0.8.4", features = ["npz", "half"], optional = true }
diff --git a/src/bin/measure.rs b/src/bin/measure.rs
@@ -8,8 +8,8 @@ use std::{
 };
 #[allow(unused_imports)]
 use web_splats::{
-    io, GaussianRenderer, PerspectiveCamera, PointCloud, Scene, SceneCamera, SplattingArgs, Split,
-    WGPUContext,
+    GaussianRenderer, PerspectiveCamera, PointCloud, Scene, SceneCamera, SplattingArgs, Split,
+    WGPUContext, io,
 };
 
 #[derive(Debug, Parser)]
@@ -86,10 +86,12 @@ async fn render_views(
                     load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
                     store: wgpu::StoreOp::Store,
                 },
+                depth_slice: None,
             })],
             depth_stencil_attachment: None,
             timestamp_writes: None,
             occlusion_query_set: None,
+            multiview_mask: None,
         });
         renderer.render(&mut render_pass, &pc);
     }
@@ -134,17 +136,19 @@ async fn render_views(
                             load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
                             store: wgpu::StoreOp::Store,
                         },
+                        depth_slice: None,
                     })],
                     depth_stencil_attachment: None,
                     timestamp_writes: None,
                     occlusion_query_set: None,
+                    multiview_mask: None,
                 });
                 renderer.render(&mut render_pass, &pc);
             }
             queue.submit(std::iter::once(encoder.finish()));
         }
     }
-    device.poll(wgpu::MaintainBase::Wait);
+    device.poll(wgpu::PollType::wait_indefinitely());
     let end = Instant::now();
     let duration = end - start;
     println!(
@@ -165,8 +169,9 @@ async fn main() {
     let scene_file = File::open(opt.scene).unwrap();
 
     let scene = Scene::from_json(scene_file).unwrap();
-
-    let wgpu_context = WGPUContext::new_instance().await;
+    let instance =
+        wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle_from_env());
+    let wgpu_context = WGPUContext::new(&instance, None).await;
     let device = &wgpu_context.device;
     let queue = &wgpu_context.queue;
 
diff --git a/src/bin/render.rs b/src/bin/render.rs
@@ -114,10 +114,12 @@ async fn render_views(
                         load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
                         store: wgpu::StoreOp::Store,
                     },
+                    depth_slice: None,
                 })],
                 depth_stencil_attachment: None,
                 timestamp_writes: None,
                 occlusion_query_set: None,
+                multiview_mask: None,
             });
             renderer.render(&mut render_pass, &pc);
         }
@@ -142,7 +144,9 @@ async fn main() {
 
     let scene = Scene::from_json(scene_file).unwrap();
 
-    let wgpu_context = WGPUContext::new_instance().await;
+    let instance =
+        wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle_from_env());
+    let wgpu_context = WGPUContext::new(&instance, None).await;
     let device = &wgpu_context.device;
     let queue = &wgpu_context.queue;
 
@@ -226,8 +230,7 @@ pub async fn download_texture(
     let sub_idx = queue.submit(std::iter::once(encoder.finish()));
 
     let mut image = {
-        let data: wgpu::BufferView<'_> =
-            download_buffer(device, &staging_buffer, Some(sub_idx)).await;
+        let data: wgpu::BufferView = download_buffer(device, &staging_buffer, Some(sub_idx)).await;
 
         ImageBuffer::<Rgba<u8>, _>::from_raw(
             bytes_per_row / texel_size,
@@ -245,19 +248,19 @@ pub async fn download_texture(
     return image::imageops::crop(&mut image, 0, 0, fb_size.width, fb_size.height).to_image();
 }
 
-async fn download_buffer<'a>(
+async fn download_buffer(
     device: &wgpu::Device,
-    buffer: &'a wgpu::Buffer,
+    buffer: &wgpu::Buffer,
     wait_idx: Option<wgpu::SubmissionIndex>,
-) -> wgpu::BufferView<'a> {
+) -> wgpu::BufferView {
     let slice = buffer.slice(..);
 
     let (tx, rx) = futures_intrusive::channel::shared::oneshot_channel();
     slice.map_async(wgpu::MapMode::Read, move |result| tx.send(result).unwrap());
     device
-        .poll(match wait_idx {
-            Some(idx) => wgpu::MaintainBase::WaitForSubmissionIndex(idx),
-            None => wgpu::MaintainBase::Wait,
+        .poll(wgpu::PollType::Wait {
+            submission_index: wait_idx,
+            timeout: None,
         })
         .unwrap();
     rx.receive().await.unwrap().unwrap();
diff --git a/src/gpu_rs.rs b/src/gpu_rs.rs
@@ -63,7 +63,7 @@ unsafe fn any_as_u8_slice<T: Sized>(p: &T) -> &[u8] {
 impl GPURSSorter {
     // The new call also needs the queue to be able to determine the maximum subgroup size (Does so by running test runs)
     pub async fn new(device: &wgpu::Device, queue: &wgpu::Queue) -> Self {
-        let sg_size = device.limits().min_subgroup_size;
+        let sg_size = device.adapter_info().subgroup_min_size;
         if sg_size == 0 || sg_size > 512 {
             let mut cur_sorter: GPURSSorter;
 
@@ -195,8 +195,8 @@ impl GPURSSorter {
         let pipeline_layout: wgpu::PipelineLayout =
             device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
                 label: Some("radix sort pipeline layout"),
-                bind_group_layouts: &[&bind_group_layout],
-                push_constant_ranges: &[],
+                bind_group_layouts: &[Some(&bind_group_layout)],
+                immediate_size: 0,
             });
 
         let raw_shader: &str = include_str!("shaders/radix_sort.wgsl");
diff --git a/src/lib.rs b/src/lib.rs
@@ -4,12 +4,12 @@ use std::{
     sync::Arc,
 };
 
+use egui_wgpu::EguiDisplayHandle;
 use renderer::Display;
 #[cfg(not(target_arch = "wasm32"))]
 use std::time::{Duration, Instant};
 #[cfg(target_arch = "wasm32")]
 use web_time::{Duration, Instant};
-use wgpu::Backends;
 
 use cgmath::{Deg, EuclideanSpace, Point3, Quaternion, UlpsEq, Vector2, Vector3};
 use egui::FullOutput;
@@ -66,15 +66,6 @@ pub struct WGPUContext {
 }
 
 impl WGPUContext {
-    pub async fn new_instance() -> Self {
-        let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor {
-            backends: Backends::PRIMARY,
-            ..Default::default()
-        });
-
-        return WGPUContext::new(&instance, None).await;
-    }
-
     pub async fn new(instance: &wgpu::Instance, surface: Option<&wgpu::Surface<'static>>) -> Self {
         let adapter = wgpu::util::initialize_adapter_from_env_or_default(instance, surface)
             .await
@@ -168,7 +159,9 @@ impl WindowContext {
 
         let window = Arc::new(window);
 
-        let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::default());
+        let instance = wgpu::Instance::new(
+            wgpu::InstanceDescriptor::new_with_display_handle_from_env(window.clone_for_wgpu()),
+        );
 
         let surface: wgpu::Surface = instance.create_surface(window.clone())?;
 
@@ -399,10 +392,14 @@ impl WindowContext {
         &mut self,
         redraw_scene: bool,
         shapes: Option<FullOutput>,
-    ) -> Result<(), wgpu::SurfaceError> {
+    ) -> Result<(), wgpu::CurrentSurfaceTexture> {
         self.stopwatch.as_mut().map(|s| s.reset());
 
-        let output = self.surface.get_current_texture()?;
+        let output = match self.surface.get_current_texture() {
+            wgpu::CurrentSurfaceTexture::Success(surface_texture) => surface_texture,
+            wgpu::CurrentSurfaceTexture::Suboptimal(surface_texture) => surface_texture,
+            err => return Err(err),
+        };
         let view_rgb = output.texture.create_view(&wgpu::TextureViewDescriptor {
             format: Some(self.config.format.remove_srgb_suffix()),
             ..Default::default()
@@ -452,7 +449,7 @@ impl WindowContext {
                     view: self.display.texture(),
                     resolve_target: None,
                     ops: wgpu::Operations {
-                        load: wgpu::LoadOp::Clear(self.splatting_args.background_color),
+                        load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
                         store: wgpu::StoreOp::Store,
                     },
                     depth_slice: None,
@@ -835,9 +832,9 @@ pub async fn open_window<R: Read + Seek + Send + Sync + 'static>(
                     match state.render(request_redraw,state.ui_visible.then_some(shapes)) {
                         Ok(_) => {}
                         // Reconfigure the surface if lost
-                        Err(wgpu::SurfaceError::Lost) => state.resize(state.window.inner_size(), None),
+                        Err(wgpu::CurrentSurfaceTexture::Suboptimal(_)) => state.resize(state.window.inner_size(), None),
+                        Err(wgpu::CurrentSurfaceTexture::Lost) => state.resize(state.window.inner_size(), None),
                         // The system is out of memory, we should probably quit
-                        Err(wgpu::SurfaceError::OutOfMemory) =>target.exit(),
                         // All other errors (Outdated, Timeout) should be resolved by the next frame
                         Err(e) => println!("error: {:?}", e),
                     }
diff --git a/src/renderer.rs b/src/renderer.rs
@@ -40,10 +40,10 @@ impl GaussianRenderer {
         let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
             label: Some("render pipeline layout"),
             bind_group_layouts: &[
-                &PointCloud::bind_group_layout_render(device), // Needed for points_2d (on binding 2)
-                &GPURSSorter::bind_group_layout_rendering(device), // Needed for indices   (on binding 4)
+                Some(&PointCloud::bind_group_layout_render(device)), // Needed for points_2d (on binding 2)
+                Some(&GPURSSorter::bind_group_layout_rendering(device)), // Needed for indices   (on binding 4)
             ],
-            push_constant_ranges: &[],
+            immediate_size: 0,
         });
 
         let shader = device.create_shader_module(wgpu::include_wgsl!("shaders/gaussian.wgsl"));
@@ -62,7 +62,21 @@ impl GaussianRenderer {
                 entry_point: Some("fs_main"),
                 targets: &[Some(wgpu::ColorTargetState {
                     format: color_format,
-                    blend: Some(wgpu::BlendState::PREMULTIPLIED_ALPHA_BLENDING),
+                    // front to back blending
+                    // this gives better visuals compared to back to front blending
+                    // if used with 8 bit textures
+                    blend: Some(wgpu::BlendState {
+                        color: wgpu::BlendComponent {
+                            src_factor: wgpu::BlendFactor::OneMinusDstAlpha,
+                            dst_factor: wgpu::BlendFactor::One,
+                            operation: wgpu::BlendOperation::Add,
+                        },
+                        alpha: wgpu::BlendComponent {
+                            src_factor: wgpu::BlendFactor::OneMinusDstAlpha,
+                            dst_factor: wgpu::BlendFactor::One,
+                            operation: wgpu::BlendOperation::Add,
+                        },
+                    }),
                     write_mask: wgpu::ColorWrites::ALL,
                 })],
                 compilation_options: Default::default(),
@@ -78,8 +92,8 @@ impl GaussianRenderer {
             },
             depth_stencil: None,
             multisample: wgpu::MultisampleState::default(),
-            multiview: None,
             cache: None,
+            multiview_mask: None,
         });
 
         let draw_indirect_buffer = device.create_buffer(&wgpu::BufferDescriptor {
@@ -349,16 +363,18 @@ impl PreprocessPipeline {
         let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
             label: Some("preprocess pipeline layout"),
             bind_group_layouts: &[
-                &UniformBuffer::<CameraUniform>::bind_group_layout(device),
-                &if !compressed {
+                Some(&UniformBuffer::<CameraUniform>::bind_group_layout(device)),
+                Some(&if !compressed {
                     PointCloud::bind_group_layout(device)
                 } else {
                     PointCloud::bind_group_layout_compressed(device)
-                },
-                &GPURSSorter::bind_group_layout_preprocess(device),
-                &UniformBuffer::<SplattingArgsUniform>::bind_group_layout(device),
+                }),
+                Some(&GPURSSorter::bind_group_layout_preprocess(device)),
+                Some(&UniformBuffer::<SplattingArgsUniform>::bind_group_layout(
+                    device,
+                )),
             ],
-            push_constant_ranges: &[],
+            immediate_size: 0,
         });
 
         let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
@@ -432,11 +448,13 @@ impl Display {
         let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
             label: Some("display pipeline layout"),
             bind_group_layouts: &[
-                &Self::bind_group_layout(device),
-                &UniformBuffer::<CameraUniform>::bind_group_layout(device),
-                &UniformBuffer::<SplattingArgsUniform>::bind_group_layout(device),
+                Some(&Self::bind_group_layout(device)),
+                Some(&UniformBuffer::<CameraUniform>::bind_group_layout(device)),
+                Some(&UniformBuffer::<SplattingArgsUniform>::bind_group_layout(
+                    device,
+                )),
             ],
-            push_constant_ranges: &[],
+            immediate_size: 0,
         });
         let shader = device.create_shader_module(include_wgsl!("shaders/display.wgsl"));
         let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
@@ -464,7 +482,7 @@ impl Display {
                 })],
                 compilation_options: Default::default(),
             }),
-            multiview: None,
+            multiview_mask: None,
             cache: None,
         });
         let (view, bind_group) = Self::create_render_target(device, source_format, width, height);
diff --git a/src/shaders/preprocess.wgsl b/src/shaders/preprocess.wgsl
@@ -267,10 +267,7 @@ fn preprocess(@builtin(global_invocation_id) gid: vec3<u32>, @builtin(num_workgr
         pack2x16float(color.rg), pack2x16float(color.ba),
     );
     // filling the sorting buffers and the indirect sort dispatch buffer
-    let znear = -camera.proj[3][2] / camera.proj[2][2];
-    let zfar = -camera.proj[3][2] / (camera.proj[2][2] - (1.));
-    // filling the sorting buffers and the indirect sort dispatch buffer
-    sort_depths[store_idx] = bitcast<u32>(zfar - pos2d.z) ;//u32(f32(0xffffffu) - pos2d.z / zfar * f32(0xffffffu));
+    sort_depths[store_idx] = bitcast<u32>(pos2d.z);
     sort_indices[store_idx] = store_idx;
 
     let keys_per_wg = 256u * 15u;         // Caution: if workgroup size (256) or keys per thread (15) changes the dispatch is wrong!!
diff --git a/src/shaders/preprocess_compressed.wgsl b/src/shaders/preprocess_compressed.wgsl
@@ -318,11 +318,7 @@ fn preprocess(@builtin(global_invocation_id) gid: vec3<u32>, @builtin(num_workgr
         pack2x16float(color.rg), pack2x16float(color.ba),
     );
     
-    // filling the sorting buffers and the indirect sort dispatch buffer
-    let znear = -camera.proj[3][2] / camera.proj[2][2];
-    let zfar = -camera.proj[3][2] / (camera.proj[2][2] - (1.));
-    // filling the sorting buffers and the indirect sort dispatch buffer
-    sort_depths[store_idx] = u32(f32(0xffffffu) - (pos2d.z - znear) / (zfar - znear) * f32(0xffffffu));
+    sort_depths[store_idx] = bitcast<u32>(pos2d.z);
     sort_indices[store_idx] = store_idx;
 
     let keys_per_wg = 256u * 15u;         // Caution: if workgroup size (256) or keys per thread (15) changes the dispatch is wrong!!