|
18 | 18 | //! End-to-end check: (almost) every sample from apache/parquet-testing/variant |
19 | 19 | //! can be parsed into our `Variant`. |
20 | 20 |
|
21 | | -// NOTE: We keep this file separate rather than a test mod inside variant.rs because it should be |
22 | | -// moved to the test folder later |
23 | | -use std::fs; |
24 | 21 | use std::path::{Path, PathBuf}; |
| 22 | +use std::{env, fs}; |
25 | 23 |
|
26 | 24 | use chrono::NaiveDate; |
27 | 25 | use parquet_variant::{ |
28 | 26 | ShortString, Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, VariantDecimal8, |
29 | 27 | }; |
30 | 28 |
|
| 29 | +/// Returns a directory path for the parquet variant test data. |
| 30 | +/// |
| 31 | +/// The data lives in the `parquet-testing` git repository: |
| 32 | +/// <https://github.com/apache/parquet-testing> |
| 33 | +/// |
| 34 | +/// Normally this is checked out as a git submodule in the root of the `arrow-rs` repository, |
| 35 | +/// so the relative path is |
| 36 | +/// * `CARGO_MANIFEST_DIR/../parquet-testing/variant`. |
| 37 | +/// |
| 38 | +/// However, the user can override this by setting the environment variable `PARQUET_TEST_DATA` |
| 39 | +/// to point to a different directory (as is done by the `verify-release-candidate.sh` script). |
| 40 | +/// |
| 41 | +/// In this case, the environment variable `PARQUET_TEST_DATA` is expected to point to a directory |
| 42 | +/// `parquet-testing/data`, so the relative path to the `variant` subdirectory is |
| 43 | +/// * `PARQUET_TEST_DATA/../variant`. |
31 | 44 | fn cases_dir() -> PathBuf { |
32 | | - Path::new(env!("CARGO_MANIFEST_DIR")) |
| 45 | + // which we expect to point at "../parquet-testing/data" |
| 46 | + let env_name = "PARQUET_TEST_DATA"; |
| 47 | + if let Ok(dir) = env::var(env_name) { |
| 48 | + let trimmed = dir.trim(); |
| 49 | + if !trimmed.is_empty() { |
| 50 | + let pb = PathBuf::from(trimmed).join("..").join("variant"); |
| 51 | + if pb.is_dir() { |
| 52 | + return pb; |
| 53 | + } else { |
| 54 | + panic!( |
| 55 | + "Can't find variant data at `{pb:?}`. Used value of env `{env_name}`../variant ", |
| 56 | + ) |
| 57 | + } |
| 58 | + } |
| 59 | + } |
| 60 | + |
| 61 | + // PARQUET_TEST_DATA is undefined or its value is trimmed to empty, let's try default dir. |
| 62 | + |
| 63 | + // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your package", |
| 64 | + // set by `cargo run` or `cargo test`, see: |
| 65 | + // https://doc.rust-lang.org/cargo/reference/environment-variables.html |
| 66 | + let pb = Path::new(env!("CARGO_MANIFEST_DIR")) |
33 | 67 | .join("..") |
34 | 68 | .join("parquet-testing") |
35 | | - .join("variant") |
| 69 | + .join("variant"); |
| 70 | + |
| 71 | + if pb.is_dir() { |
| 72 | + pb |
| 73 | + } else { |
| 74 | + panic!( |
| 75 | + "env `{env_name}` is undefined or has empty value, and \ |
| 76 | + `CARGO_MANIFEST_DIR/../parquet-testing/variant` is not a directory: `{pb:?}`\n\ |
| 77 | + HINT: try running `git submodule update --init`", |
| 78 | + ) |
| 79 | + } |
36 | 80 | } |
37 | 81 |
|
38 | 82 | struct Case { |
|
0 commit comments