|
15 | 15 | // specific language governing permissions and limitations |
16 | 16 | // under the License. |
17 | 17 |
|
18 | | -use std::sync::{Arc, OnceLock}; |
| 18 | +use std::sync::{Arc, LazyLock}; |
19 | 19 |
|
20 | 20 | use arrow::array::{Array, RecordBatch, StringArray}; |
21 | 21 | use arrow::datatypes::{DataType, Field, Schema}; |
@@ -204,65 +204,24 @@ impl Utf8Test { |
204 | 204 |
|
205 | 205 | /// all combinations of interesting charactes with lengths ranging from 1 to 4 |
206 | 206 | fn values() -> &'static [String] { |
207 | | - VALUES.get_or_init(|| { |
208 | | - let mut rng = rand::thread_rng(); |
209 | | - |
210 | | - let characters = [ |
211 | | - "z", |
212 | | - "0", |
213 | | - "~", |
214 | | - "ß", |
215 | | - "℣", |
216 | | - "%", // this one is useful for like/not like tests since it will result in randomly inserted wildcards |
217 | | - "_", // this one is useful for like/not like tests since it will result in randomly inserted wildcards |
218 | | - "\u{7F}", |
219 | | - "\u{7FF}", |
220 | | - "\u{FF}", |
221 | | - "\u{10FFFF}", |
222 | | - "\u{D7FF}", |
223 | | - "\u{FDCF}", |
224 | | - // null character |
225 | | - "\u{0}", |
226 | | - ]; |
227 | | - let value_lengths = [1, 2, 3]; |
228 | | - let mut values = vec![]; |
229 | | - for length in &value_lengths { |
230 | | - values.extend( |
231 | | - characters |
232 | | - .iter() |
233 | | - .cloned() |
234 | | - .combinations(*length) |
235 | | - // now get all permutations of each combination |
236 | | - .flat_map(|c| c.into_iter().permutations(*length)) |
237 | | - // and join them into strings |
238 | | - .map(|c| c.join("")), |
239 | | - ); |
240 | | - } |
241 | | - println!("Generated {} values", values.len()); |
242 | | - // randomly pick 100 values |
243 | | - values.shuffle(&mut rng); |
244 | | - values.truncate(100); |
245 | | - values |
246 | | - }) |
| 207 | + &VALUES |
247 | 208 | } |
248 | 209 |
|
249 | 210 | /// return the in memory object store |
250 | 211 | fn memory_store() -> &'static Arc<dyn ObjectStore> { |
251 | | - MEMORY_STORE.get_or_init(|| Arc::new(InMemory::new())) |
| 212 | + &MEMORY_STORE |
252 | 213 | } |
253 | 214 |
|
254 | 215 | /// return the schema of the created test files |
255 | 216 | fn schema() -> Arc<Schema> { |
256 | | - let schema = SCHEMA.get_or_init(|| { |
257 | | - Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)])) |
258 | | - }); |
| 217 | + let schema = &SCHEMA; |
259 | 218 | Arc::clone(schema) |
260 | 219 | } |
261 | 220 |
|
262 | 221 | /// Return a list of test files with UTF8 data and combinations of |
263 | 222 | /// [`Self::values`] |
264 | 223 | async fn test_files() -> Vec<TestFile> { |
265 | | - let files_mutex = TESTFILES.get_or_init(|| Mutex::new(vec![])); |
| 224 | + let files_mutex = &TESTFILES; |
266 | 225 | let mut files = files_mutex.lock().await; |
267 | 226 | if !files.is_empty() { |
268 | 227 | return (*files).clone(); |
@@ -385,16 +344,57 @@ async fn write_parquet_file( |
385 | 344 | } |
386 | 345 |
|
387 | 346 | /// The string values for [Utf8Test::values] |
388 | | -static VALUES: OnceLock<Vec<String>> = OnceLock::new(); |
| 347 | +static VALUES: LazyLock<Vec<String>> = LazyLock::new(|| { |
| 348 | + let mut rng = rand::thread_rng(); |
| 349 | + |
| 350 | + let characters = [ |
| 351 | + "z", |
| 352 | + "0", |
| 353 | + "~", |
| 354 | + "ß", |
| 355 | + "℣", |
| 356 | + "%", // this one is useful for like/not like tests since it will result in randomly inserted wildcards |
| 357 | + "_", // this one is useful for like/not like tests since it will result in randomly inserted wildcards |
| 358 | + "\u{7F}", |
| 359 | + "\u{7FF}", |
| 360 | + "\u{FF}", |
| 361 | + "\u{10FFFF}", |
| 362 | + "\u{D7FF}", |
| 363 | + "\u{FDCF}", |
| 364 | + // null character |
| 365 | + "\u{0}", |
| 366 | + ]; |
| 367 | + let value_lengths = [1, 2, 3]; |
| 368 | + let mut values = vec![]; |
| 369 | + for length in &value_lengths { |
| 370 | + values.extend( |
| 371 | + characters |
| 372 | + .iter() |
| 373 | + .cloned() |
| 374 | + .combinations(*length) |
| 375 | + // now get all permutations of each combination |
| 376 | + .flat_map(|c| c.into_iter().permutations(*length)) |
| 377 | + // and join them into strings |
| 378 | + .map(|c| c.join("")), |
| 379 | + ); |
| 380 | + } |
| 381 | + println!("Generated {} values", values.len()); |
| 382 | + // randomly pick 100 values |
| 383 | + values.shuffle(&mut rng); |
| 384 | + values.truncate(100); |
| 385 | + values |
| 386 | +}); |
389 | 387 | /// The schema for the [Utf8Test::schema] |
390 | | -static SCHEMA: OnceLock<Arc<Schema>> = OnceLock::new(); |
| 388 | +static SCHEMA: LazyLock<Arc<Schema>> = |
| 389 | + LazyLock::new(|| Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)]))); |
391 | 390 |
|
392 | 391 | /// The InMemory object store |
393 | | -static MEMORY_STORE: OnceLock<Arc<dyn ObjectStore>> = OnceLock::new(); |
| 392 | +static MEMORY_STORE: LazyLock<Arc<dyn ObjectStore>> = |
| 393 | + LazyLock::new(|| Arc::new(InMemory::new())); |
394 | 394 |
|
395 | 395 | /// List of in memory parquet files with UTF8 data |
396 | | -// Use a mutex rather than OnceLock to allow for async initialization |
397 | | -static TESTFILES: OnceLock<Mutex<Vec<TestFile>>> = OnceLock::new(); |
| 396 | +// Use a mutex rather than LazyLock to allow for async initialization |
| 397 | +static TESTFILES: LazyLock<Mutex<Vec<TestFile>>> = LazyLock::new(|| Mutex::new(vec![])); |
398 | 398 |
|
399 | 399 | /// Holds a temporary parquet file path and its size |
400 | 400 | #[derive(Debug, Clone)] |
|
0 commit comments