|
4 | 4 | use std::fmt::Debug; |
5 | 5 | use std::hash::Hash; |
6 | 6 |
|
| 7 | +<<<<<<< HEAD:vortex-array/src/arrays/dict/array.rs |
7 | 8 | use vortex_buffer::BitBuffer; |
| 9 | +======= |
| 10 | +use vortex_array::stats::{ArrayStats, StatsSetRef}; |
| 11 | +use vortex_array::vtable::{ArrayVTable, NotSupported, VTable, ValidityVTable}; |
| 12 | +use vortex_array::{ |
| 13 | + Array, ArrayEq, ArrayHash, ArrayRef, EncodingId, EncodingRef, Precision, ToCanonical, vtable, |
| 14 | +}; |
| 15 | +use vortex_buffer::{BitBuffer, BitBufferMut}; |
| 16 | +>>>>>>> 541544af8 (perf[dict]: unreferenced mask Vec<bool>):encodings/dict/src/array.rs |
8 | 17 | use vortex_dtype::{DType, match_each_integer_ptype}; |
9 | 18 | use vortex_error::{VortexExpect as _, VortexResult, vortex_bail}; |
10 | 19 | use vortex_mask::{AllOr, Mask}; |
@@ -114,36 +123,33 @@ impl DictArray { |
114 | 123 | /// |
115 | 124 | /// This is useful for operations like min/max that need to ignore unreferenced values. |
116 | 125 | pub fn compute_unreferenced_values_mask(&self) -> VortexResult<BitBuffer> { |
117 | | - use vortex_buffer::BitBufferMut; |
118 | | - |
119 | 126 | let codes_validity = self.codes().validity_mask(); |
120 | 127 | let codes_primitive = self.codes().to_primitive(); |
121 | 128 | let values_len = self.values().len(); |
122 | 129 |
|
123 | | - let mut unreferenced = BitBufferMut::new_set(values_len); |
| 130 | + let mut unreferenced_vec = vec![true; values_len]; |
124 | 131 | if codes_validity.all_true() { |
125 | 132 | match_each_integer_ptype!(codes_primitive.ptype(), |P| { |
| 133 | + #[allow(clippy::cast_possible_truncation)] |
126 | 134 | for &code in codes_primitive.as_slice::<P>().iter() { |
127 | | - #[allow(clippy::cast_possible_truncation)] |
128 | | - unsafe { |
129 | | - unreferenced.unset(code as usize); |
130 | | - } |
| 135 | + unreferenced_vec[code as usize] = false; |
131 | 136 | } |
132 | 137 | }); |
133 | | - return Ok(unreferenced.freeze()); |
| 138 | + } else { |
| 139 | + match_each_integer_ptype!(codes_primitive.ptype(), |P| { |
| 140 | + let codes = codes_primitive.as_slice::<P>(); |
| 141 | + codes_validity |
| 142 | + .to_bit_buffer() |
| 143 | + .set_indices() |
| 144 | + .for_each(|idx| { |
| 145 | + unreferenced_vec[codes[idx] as usize] = false; |
| 146 | + }); |
| 147 | + }) |
134 | 148 | } |
135 | 149 |
|
136 | | - match_each_integer_ptype!(codes_primitive.ptype(), |P| { |
137 | | - let codes = codes_primitive.as_slice::<P>(); |
138 | | - codes_validity |
139 | | - .to_bit_buffer() |
140 | | - .set_indices() |
141 | | - .for_each(|idx| { |
142 | | - #[allow(clippy::cast_possible_truncation)] |
143 | | - unreferenced.unset(codes[idx] as usize); |
144 | | - }); |
145 | | - Ok(unreferenced.freeze()) |
146 | | - }) |
| 150 | + Ok(BitBuffer::collect_bool(values_len, |idx| { |
| 151 | + unreferenced_vec[idx] |
| 152 | + })) |
147 | 153 | } |
148 | 154 | } |
149 | 155 |
|
|
0 commit comments