@@ -63,70 +63,79 @@ unsafe fn any_as_u8_slice<T: Sized>(p: &T) -> &[u8] {
6363impl GPURSSorter {
6464 // The new call also needs the queue to be able to determine the maximum subgroup size (Does so by running test runs)
6565 pub async fn new ( device : & wgpu:: Device , queue : & wgpu:: Queue ) -> Self {
66- let mut cur_sorter: GPURSSorter ;
67-
68- log:: debug!( "Searching for the maximum subgroup size (wgpu currently does not allow to query subgroup sizes)" ) ;
69- let sizes = vec ! [ 1 , 8 , 16 , 32 ] ;
70- let mut cur_size = 2 ;
71- enum State {
72- Init ,
73- Increasing ,
74- Decreasing ,
75- }
76- let mut biggest_that_worked = 0 ;
77- let mut s = State :: Init ;
78- loop {
79- if cur_size >= sizes. len ( ) {
80- break ;
66+ let sg_size = device. limits ( ) . min_subgroup_size ;
67+ if sg_size == 0 || sg_size > 512 {
68+ let mut cur_sorter: GPURSSorter ;
69+
70+ log:: debug!( "Searching for the maximum subgroup size (wgpu currently does not allow to query subgroup sizes)" ) ;
71+ let sizes = vec ! [ 1 , 8 , 16 , 32 ] ;
72+ let mut cur_size = 2 ;
73+ enum State {
74+ Init ,
75+ Increasing ,
76+ Decreasing ,
8177 }
82- log:: debug!( "Checking sorting with subgroupsize {}" , sizes[ cur_size] ) ;
83- cur_sorter = Self :: new_with_sg_size ( device, sizes[ cur_size] ) ;
84- let sort_success = cur_sorter. test_sort ( device, queue) . await ;
85- log:: debug!( "{} worked: {}" , sizes[ cur_size] , sort_success) ;
86- match s {
87- State :: Init => {
88- if sort_success {
89- biggest_that_worked = sizes[ cur_size] ;
90- s = State :: Increasing ;
91- cur_size += 1 ;
92- } else {
93- s = State :: Decreasing ;
94- cur_size -= 1 ;
95- }
78+ let mut biggest_that_worked = 0 ;
79+ let mut s = State :: Init ;
80+ loop {
81+ if cur_size >= sizes. len ( ) {
82+ break ;
9683 }
97- State :: Increasing => {
98- if sort_success {
99- if sizes[ cur_size] > biggest_that_worked {
84+ log:: debug!( "Checking sorting with subgroupsize {}" , sizes[ cur_size] ) ;
85+ cur_sorter = Self :: new_with_sg_size ( device, sizes[ cur_size] ) ;
86+ let sort_success = cur_sorter. test_sort ( device, queue) . await ;
87+ log:: debug!( "{} worked: {}" , sizes[ cur_size] , sort_success) ;
88+ match s {
89+ State :: Init => {
90+ if sort_success {
10091 biggest_that_worked = sizes[ cur_size] ;
92+ s = State :: Increasing ;
93+ cur_size += 1 ;
94+ } else {
95+ s = State :: Decreasing ;
96+ cur_size -= 1 ;
10197 }
102- cur_size += 1 ;
103- } else {
104- break ;
10598 }
106- }
107- State :: Decreasing => {
108- if sort_success {
109- if sizes[ cur_size] > biggest_that_worked {
110- biggest_that_worked = sizes[ cur_size] ;
99+ State :: Increasing => {
100+ if sort_success {
101+ if sizes[ cur_size] > biggest_that_worked {
102+ biggest_that_worked = sizes[ cur_size] ;
103+ }
104+ cur_size += 1 ;
105+ } else {
106+ break ;
107+ }
108+ }
109+ State :: Decreasing => {
110+ if sort_success {
111+ if sizes[ cur_size] > biggest_that_worked {
112+ biggest_that_worked = sizes[ cur_size] ;
113+ }
114+ break ;
115+ } else {
116+ cur_size -= 1 ;
111117 }
112- break ;
113- } else {
114- cur_size -= 1 ;
115118 }
116119 }
117120 }
118- }
119- if biggest_that_worked == 0 {
120- panic ! (
121+ if biggest_that_worked == 0 {
122+ panic ! (
121123 "GPURSSorter::new() No workgroup size that works was found. Unable to use sorter"
122124 ) ;
125+ }
126+ cur_sorter = Self :: new_with_sg_size ( device, biggest_that_worked as u32 ) ;
127+ log:: info!(
128+ "Created a sorter with subgroup size {}\n " ,
129+ cur_sorter. subgroup_size
130+ ) ;
131+ return cur_sorter;
132+ } else {
133+ log:: info!(
134+ "Created a sorter with subgroup size {}\n " ,
135+ sg_size
136+ ) ;
137+ return Self :: new_with_sg_size ( device, sg_size) ;
123138 }
124- cur_sorter = Self :: new_with_sg_size ( device, biggest_that_worked) ;
125- log:: info!(
126- "Created a sorter with subgroup size {}\n " ,
127- cur_sorter. subgroup_size
128- ) ;
129- return cur_sorter;
130139 }
131140
132141 pub fn create_sort_stuff (
@@ -165,7 +174,7 @@ impl GPURSSorter {
165174 }
166175 }
167176
168- fn new_with_sg_size ( device : & wgpu:: Device , sg_size : i32 ) -> Self {
177+ fn new_with_sg_size ( device : & wgpu:: Device , sg_size : u32 ) -> Self {
169178 // special variables for scatter shade
170179 let histogram_sg_size: usize = sg_size as usize ;
171180 let rs_sweep_0_size: usize = RS_RADIX_SIZE / histogram_sg_size;
0 commit comments