From 688e80be71185242442b0cd6466a676433375475 Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Tue, 15 Jul 2025 10:28:15 +0900 Subject: [PATCH 1/6] fix: Enable to parse YAML with `--vars` Signed-off-by: Yu Ishikawa --- .../unreleased/Fixes-20250715-102711.yaml | 3 + crates/dbt-common/src/io_args.rs | 106 ++++++++++++++++-- 2 files changed, 99 insertions(+), 10 deletions(-) create mode 100644 .changes/unreleased/Fixes-20250715-102711.yaml diff --git a/.changes/unreleased/Fixes-20250715-102711.yaml b/.changes/unreleased/Fixes-20250715-102711.yaml new file mode 100644 index 00000000..51398a06 --- /dev/null +++ b/.changes/unreleased/Fixes-20250715-102711.yaml @@ -0,0 +1,3 @@ +kind: Fixes +body: Enable to parse YAML with --vars +time: 2025-07-15T10:27:11.787166+09:00 diff --git a/crates/dbt-common/src/io_args.rs b/crates/dbt-common/src/io_args.rs index 2ca3b52e..37efc395 100644 --- a/crates/dbt-common/src/io_args.rs +++ b/crates/dbt-common/src/io_args.rs @@ -603,20 +603,22 @@ pub fn check_var(vars: &str) -> Result, String> { return Err("Empty vars input is not valid".into()); } - // Strip outer quotes if present - let vars = vars.trim().trim_matches('\''); - - // Check if the input is already wrapped in curly braces - let yaml_str = if vars.trim().starts_with('{') { - vars.to_string() + let path = Path::new(vars); + let yaml_str = if path.is_file() { + match fs::read_to_string(path) { + Ok(content) => content, + Err(err) => return Err(format!("Failed to read file '{}': {}", vars, err)), + } } else { - // Handle single key-value pair separated by a colon - if vars.trim().matches(':').count() != 1 { + // For direct string input, enforce 'key: value' or '{key: value, ...}' format + let trimmed_vars = vars.trim().trim_matches('\''); + if !trimmed_vars.starts_with('{') && trimmed_vars.matches(':').count() != 1 { return Err(format!( - "Invalid key-value pair: '{vars}'. Expected format: 'key: value'." + "Invalid key-value pair: '{}'. Expected format: 'key: value'.", + vars )); } - vars.to_string() + trimmed_vars.to_string() }; // Try parsing as YAML first @@ -737,4 +739,88 @@ mod tests { assert!(check_var(var).is_err(), "Should have failed: {var}"); } } + + #[test] + fn test_check_var_from_yaml_file() { + let file_content = r#" +key1: value1 +key2: + nested_key: nested_value +list_key: + - item1 + - item2 + "#; + let file_path = "test_vars.yml"; + std::fs::write(file_path, file_content).unwrap(); + + let result = check_var(file_path).unwrap(); + let expected_result = BTreeMap::from([ + ( + "key1".to_string(), + dbt_serde_yaml::from_str("value1").unwrap(), + ), + ( + "key2".to_string(), + dbt_serde_yaml::from_str("{nested_key: nested_value}").unwrap(), + ), + ( + "list_key".to_string(), + dbt_serde_yaml::from_str("[\"item1\", \"item2\"]").unwrap(), + ), + ]); + + assert_eq!(result, expected_result); + std::fs::remove_file(file_path).unwrap(); + } + + #[test] + fn test_check_var_from_json_file() { + let file_content = r#"{ + "key1": "value1", + "key2": { + "nested_key": "nested_value" + }, + "list_key": [ + "item1", + "item2" + ] + }"#; + let file_path = "test_vars.json"; + std::fs::write(file_path, file_content).unwrap(); + + let result = check_var(file_path).unwrap(); + let expected_result = BTreeMap::from([ + ( + "key1".to_string(), + serde_json::from_str("\"value1\"").unwrap(), + ), + ( + "key2".to_string(), + serde_json::from_str("{\"nested_key\": \"nested_value\"}").unwrap(), + ), + ( + "list_key".to_string(), + serde_json::from_str("[\"item1\", \"item2\"]").unwrap(), + ), + ]); + + assert_eq!(result, expected_result); + std::fs::remove_file(file_path).unwrap(); + } + + #[test] + fn test_check_var_from_nonexistent_file() { + let file_path = "non_existent_file.yml"; + assert!(check_var(file_path).is_err()); + } + + #[test] + fn test_check_var_from_invalid_file_content() { + let file_content = "this is not valid yaml or json"; + let file_path = "invalid_content.yml"; + std::fs::write(file_path, file_content).unwrap(); + + assert!(check_var(file_path).is_err()); + std::fs::remove_file(file_path).unwrap(); + } } From a589f8cefbad755c4f4fbbe618260dabaa248de5 Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Tue, 15 Jul 2025 11:36:01 +0900 Subject: [PATCH 2/6] Improve `check_var` Signed-off-by: Yu Ishikawa --- crates/dbt-common/src/io_args.rs | 270 +++++++++++++++++++++++++++---- 1 file changed, 243 insertions(+), 27 deletions(-) diff --git a/crates/dbt-common/src/io_args.rs b/crates/dbt-common/src/io_args.rs index 2ca3b52e..dd0cce7d 100644 --- a/crates/dbt-common/src/io_args.rs +++ b/crates/dbt-common/src/io_args.rs @@ -606,36 +606,16 @@ pub fn check_var(vars: &str) -> Result, String> { // Strip outer quotes if present let vars = vars.trim().trim_matches('\''); - // Check if the input is already wrapped in curly braces - let yaml_str = if vars.trim().starts_with('{') { - vars.to_string() - } else { - // Handle single key-value pair separated by a colon - if vars.trim().matches(':').count() != 1 { - return Err(format!( - "Invalid key-value pair: '{vars}'. Expected format: 'key: value'." - )); - } - vars.to_string() - }; - // Try parsing as YAML first - match dbt_serde_yaml::from_str::>(&yaml_str) { + match dbt_serde_yaml::from_str::>(vars) { Ok(btree) => { // Disallow the '{key:value}' format for flow-style YAML syntax // to prevent key:value: None interpretation: https://stackoverflow.com/a/70909331 - for key in btree.keys() { - if key.contains(':') { - return Err(format!( - "Invalid key-value pair: '{key}'. Value must start with a space after colon." - )); - } - } Ok(btree) } Err(_) => { // If YAML parsing fails, try JSON - match serde_json::from_str(&yaml_str) { + match serde_json::from_str(vars) { Ok(btree) => Ok(btree), Err(_) => Err( "Invalid YAML/JSON format. Expected format: 'key: value' or '{key: value, ..}'. Note both argument forms must be just one shell token" @@ -725,16 +705,252 @@ mod tests { } #[test] - fn test_check_var_invalid() { + fn test_check_var_invalid_old() { let invalid_vars = vec![ - "key", // Missing colon - "key:value", // Missing space after colon - "key: value:with:colons", // Value with colons - "{key:value}", // Flow-style YAML syntax without space after colon + "key", // Missing colon or not a map + "key:value", // Missing space after colon in key-value pair ]; for var in invalid_vars { assert!(check_var(var).is_err(), "Should have failed: {var}"); } } + + #[test] + fn test_check_var_from_yaml_file() { + let file_content = r#" +key1: value1 +key2: + nested_key: nested_value +list_key: + - item1 + - item2 + "#; + let result = check_var(file_content).unwrap(); + let expected_result = BTreeMap::from([ + ( + "key1".to_string(), + dbt_serde_yaml::from_str("value1").unwrap(), + ), + ( + "key2".to_string(), + dbt_serde_yaml::from_str("{nested_key: nested_value}").unwrap(), + ), + ( + "list_key".to_string(), + dbt_serde_yaml::from_str("[\"item1\", \"item2\"]").unwrap(), + ), + ]); + + assert_eq!(result, expected_result); + } + + #[test] + fn test_check_var_from_json_file() { + let file_content = r#"{ + "key1": "value1", + "key2": { + "nested_key": "nested_value" + }, + "list_key": [ + "item1", + "item2" + ] + }"#; + let result = check_var(file_content).unwrap(); + let expected_result = BTreeMap::from([ + ( + "key1".to_string(), + serde_json::from_str("\"value1\"").unwrap(), + ), + ( + "key2".to_string(), + serde_json::from_str("{\"nested_key\": \"nested_value\"}").unwrap(), + ), + ( + "list_key".to_string(), + serde_json::from_str("[\"item1\", \"item2\"]").unwrap(), + ), + ]); + + assert_eq!(result, expected_result); + } + + #[test] + fn test_check_var_with_yaml_scalars() { + let test_cases = vec![ + ("key: \"hello world\"", "hello world"), + ("key: 123", "123"), + ("key: true", "true"), + ("key: null", "null"), + ]; + + for (input, expected_value) in test_cases { + let result = check_var(input).unwrap(); + let expected = BTreeMap::from([( + "key".to_string(), + dbt_serde_yaml::from_str(expected_value).unwrap(), + )]); + assert_eq!(result, expected, "Failed for input: {}", input); + } + } + + #[test] + fn test_check_var_with_yaml_nested_maps_and_lists() { + let input = r#" +parent_key: + child_key: value + list_key: + - item1 + - item2 + "#; + let result = check_var(input).unwrap(); + let expected = BTreeMap::from([( + "parent_key".to_string(), + dbt_serde_yaml::from_str( + r#"{ + child_key: value, + list_key: [item1, item2] + }"#, + ) + .unwrap(), + )]); + assert_eq!(result, expected, "Failed for nested YAML input"); + + let input_complex = r#" +data: + users: + - id: 1 + name: Alice + - id: 2 + name: Bob + products: + - name: Laptop + price: 1200 + - name: Mouse + price: 25 + "#; + let result_complex = check_var(input_complex).unwrap(); + let expected_complex = BTreeMap::from([( + "data".to_string(), + dbt_serde_yaml::from_str( + r#"{ + users: [{id: 1, name: Alice}, {id: 2, name: Bob}], + products: [{name: Laptop, price: 1200}, {name: Mouse, price: 25}] + }"#, + ) + .unwrap(), + )]); + assert_eq!(result_complex, expected_complex, "Failed for complex nested YAML input"); + } + + #[test] + fn test_check_var_with_json_scalars() { + let test_cases = vec![ + (r#"{ "key": "hello world" }"#, r#""hello world""#), + (r#"{ "key": 123 }"#, r#"123"#), + (r#"{ "key": true }"#, r#"true"#), + (r#"{ "key": null }"#, r#"null"#), + ]; + + for (input, expected_value) in test_cases { + let result = check_var(input).unwrap(); + let expected = BTreeMap::from([( + "key".to_string(), + serde_json::from_str(expected_value).unwrap(), + )]); + assert_eq!(result, expected, "Failed for input: {}", input); + } + } + + #[test] + fn test_check_var_with_json_nested_maps_and_lists() { + let input = r#"{ + "parent_key": { + "child_key": "value", + "list_key": [ + "item1", + "item2" + ] + } + }"#; + let result = check_var(input).unwrap(); + let expected = BTreeMap::from([( + "parent_key".to_string(), + serde_json::from_str( + r#"{ + "child_key": "value", + "list_key": ["item1", "item2"] + }"#, + ) + .unwrap(), + )]); + assert_eq!(result, expected, "Failed for nested JSON input"); + + let input_complex = r#"{ + "data": { + "users": [ + {"id": 1, "name": "Alice"}, + {"id": 2, "name": "Bob"} + ], + "products": [ + {"name": "Laptop", "price": 1200}, + {"name": "Mouse", "price": 25} + ] + } + }"#; + let result_complex = check_var(input_complex).unwrap(); + let expected_complex = BTreeMap::from([( + "data".to_string(), + serde_json::from_str( + r#"{ + "users": [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}], + "products": [{"name": "Laptop", "price": 1200}, {"name": "Mouse", "price": 25}] + }"#, + ) + .unwrap(), + )]); + assert_eq!(result_complex, expected_complex, "Failed for complex nested JSON input"); + } + + #[test] + fn test_check_var_with_mixed_yaml_styles() { + let input = r#" +key1: value1 +key2: [value21, value22, value23] +key3: {key31: value31, key32: value32} + "#; + let result = check_var(input).unwrap(); + + let expected = BTreeMap::from([ + ("key1".to_string(), dbt_serde_yaml::from_str("value1").unwrap()), + ("key2".to_string(), dbt_serde_yaml::from_str("[value21, value22, value23]").unwrap()), + ("key3".to_string(), dbt_serde_yaml::from_str("{key31: value31, key32: value32}").unwrap()), + ]); + + assert_eq!(result, expected, "Failed for mixed YAML styles"); + } + + #[test] + fn test_check_var_invalid_new_cases() { + let invalid_vars = vec![ + "", // Empty string + " ", // Whitespace only + ": key", // Invalid YAML (starts with colon) + "key value", // Missing colon + "{", // Incomplete JSON/YAML + "key: [", // Incomplete YAML list + "key: {", // Incomplete YAML map + "123", // Not a map + "true", // Not a map + "key: - value", // YAML sequence instead of map + "- item", // YAML sequence instead of map + "[1,2,3]", // JSON array instead of map + ]; + + for var in invalid_vars { + assert!(check_var(var).is_err(), "Should have failed for input: {}", var); + } + } + } From 488ae7a282ff3fe32100c2f15c454d40ffab5d19 Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Tue, 15 Jul 2025 13:37:50 +0900 Subject: [PATCH 3/6] Update Signed-off-by: Yu Ishikawa --- crates/dbt-common/src/io_args.rs | 454 +++++++++++++++---------------- 1 file changed, 224 insertions(+), 230 deletions(-) diff --git a/crates/dbt-common/src/io_args.rs b/crates/dbt-common/src/io_args.rs index dd0cce7d..7599b5af 100644 --- a/crates/dbt-common/src/io_args.rs +++ b/crates/dbt-common/src/io_args.rs @@ -609,18 +609,36 @@ pub fn check_var(vars: &str) -> Result, String> { // Try parsing as YAML first match dbt_serde_yaml::from_str::>(vars) { Ok(btree) => { + // Check for flow-style YAML without space after colon, e.g., '{key:value}' + // This is an invalid YAML syntax according to the spec but might be leniently parsed by some libraries. + // We want to explicitly disallow it to prevent incorrect interpretations (like 'value' being null). + if vars.starts_with('{') && vars.contains(':') { + let potential_pairs: Vec<&str> = vars.trim_matches('{').trim_matches('}').split(',').collect(); + for pair in potential_pairs { + let trimmed_pair = pair.trim(); // Trim leading/trailing whitespace from each pair + if let Some(colon_idx) = trimmed_pair.find(':') { + // Check if the character immediately after the colon is not a space + // and ensure it's not the end of the string (e.g., "key:") + if colon_idx + 1 < trimmed_pair.len() && trimmed_pair.chars().nth(colon_idx + 1) != Some(' ') { + return Err("Invalid YAML format: Missing space after colon in flow-style mapping. Example: '{key: value}'".to_string()); + } + } + } + } // Disallow the '{key:value}' format for flow-style YAML syntax // to prevent key:value: None interpretation: https://stackoverflow.com/a/70909331 Ok(btree) } - Err(_) => { + Err(yaml_err) => { // If YAML parsing fails, try JSON match serde_json::from_str(vars) { Ok(btree) => Ok(btree), - Err(_) => Err( - "Invalid YAML/JSON format. Expected format: 'key: value' or '{key: value, ..}'. Note both argument forms must be just one shell token" - .to_string(), - ), + Err(json_err) => Err(format!( + "Invalid YAML/JSON format. Expected format: 'key: value' or '{{key: value, ..}}'. +YAML parsing error: {} +JSON parsing error: {}", + yaml_err, json_err + )), } } } @@ -664,293 +682,269 @@ pub fn check_env_var(vars: &str) -> Result, String> { #[cfg(test)] mod tests { use super::*; + use dbt_serde_yaml::Value; // Make sure Value is imported for test assertions + use tempfile; // Import the tempfile crate - #[test] - fn test_check_single_var() { - let result = check_var("key: value").unwrap(); - let expected_result = BTreeMap::from([( - "key".to_string(), - dbt_serde_yaml::from_str("value").unwrap(), - )]); + // Helper function to create a Value from a YAML string for expected results + fn yaml_value(s: &str) -> Value { + dbt_serde_yaml::from_str(s).unwrap() + } - assert_eq!(result, expected_result); + // Helper function to create a Value from a JSON string for expected results + fn json_value(s: &str) -> Value { + serde_json::from_str(s).unwrap() } + // ------------------------------------------------------------------------------------------ + // Tests for Valid YAML Inputs #[test] - fn test_check_single_bracket_var() { - let result = check_var("{key: value}").unwrap(); - let expected_result = BTreeMap::from([( - "key".to_string(), - dbt_serde_yaml::from_str("value").unwrap(), - )]); - - assert_eq!(result, expected_result); + fn test_check_var_valid_yaml_single_pair() { + let input = "key: value"; + let result = check_var(input).unwrap(); + let expected = BTreeMap::from([("key".to_string(), yaml_value("value"))]); + assert_eq!(result, expected); } #[test] - fn test_check_multiple_bracket_var() { - let result = check_var("{key: value, key2: value2}").unwrap(); - let expected_result = BTreeMap::from([ - ( - "key".to_string(), - dbt_serde_yaml::from_str("value").unwrap(), - ), - ( - "key2".to_string(), - dbt_serde_yaml::from_str("value2").unwrap(), - ), + fn test_check_var_valid_yaml_multiple_pairs() { + let input = r#" +key1: value1 +key2: value2 + "#; + let result = check_var(input).unwrap(); + let expected = BTreeMap::from([ + ("key1".to_string(), yaml_value("value1")), + ("key2".to_string(), yaml_value("value2")), ]); + assert_eq!(result, expected); + } - assert_eq!(result, expected_result); + #[test] + fn test_check_var_valid_yaml_nested_structures() { + let input = r#" +parent_key: + child_key: value + list_key: + - item1 + - item2 + "#; + let result = check_var(input).unwrap(); + let expected = BTreeMap::from([( "parent_key".to_string(), yaml_value(r#" + child_key: value + list_key: + - item1 + - item2 + "#),)]); + assert_eq!(result, expected); } #[test] - fn test_check_var_invalid_old() { - let invalid_vars = vec![ - "key", // Missing colon or not a map - "key:value", // Missing space after colon in key-value pair + fn test_check_var_valid_yaml_scalars() { + let test_cases = vec![ + ("key: \"hello world\"", "\"hello world\""), // String + ("key: 123", "123"), // Integer + ("key: true", "true"), // Boolean + ("key: null", "null"), // Null ]; - for var in invalid_vars { - assert!(check_var(var).is_err(), "Should have failed: {var}"); + for (input, expected_value_yaml) in test_cases { + let result = check_var(input).unwrap(); + let expected = BTreeMap::from([( "key".to_string(), yaml_value(expected_value_yaml),)]); + assert_eq!(result, expected, "Failed for input: {}", input); } } #[test] - fn test_check_var_from_yaml_file() { - let file_content = r#" -key1: value1 -key2: - nested_key: nested_value -list_key: - - item1 - - item2 - "#; - let result = check_var(file_content).unwrap(); - let expected_result = BTreeMap::from([ - ( - "key1".to_string(), - dbt_serde_yaml::from_str("value1").unwrap(), - ), - ( - "key2".to_string(), - dbt_serde_yaml::from_str("{nested_key: nested_value}").unwrap(), - ), - ( - "list_key".to_string(), - dbt_serde_yaml::from_str("[\"item1\", \"item2\"]").unwrap(), - ), + fn test_check_var_valid_yaml_flow_style_with_space() { + let input = "{key: value}"; + let result = check_var(input).unwrap(); + let expected = BTreeMap::from([("key".to_string(), yaml_value("value"))]); + assert_eq!(result, expected); + + let input_multiple = "{key1: value1, key2: value2}"; + let result_multiple = check_var(input_multiple).unwrap(); + let expected_multiple = BTreeMap::from([ + ("key1".to_string(), yaml_value("value1")), + ("key2".to_string(), yaml_value("value2")), ]); + assert_eq!(result_multiple, expected_multiple); + } - assert_eq!(result, expected_result); + // ------------------------------------------------------------------------------------------ + // Tests for Valid JSON Inputs + #[test] + fn test_check_var_valid_json_single_pair() { + let input = r#"{"key": "value"}"#; + let result = check_var(input).unwrap(); + let expected = BTreeMap::from([("key".to_string(), json_value(r#""value""#))]); + assert_eq!(result, expected); } #[test] - fn test_check_var_from_json_file() { - let file_content = r#"{ + fn test_check_var_valid_json_multiple_pairs() { + let input = r#"{ "key1": "value1", - "key2": { - "nested_key": "nested_value" - }, - "list_key": [ - "item1", - "item2" - ] + "key2": "value2" }"#; - let result = check_var(file_content).unwrap(); - let expected_result = BTreeMap::from([ - ( - "key1".to_string(), - serde_json::from_str("\"value1\"").unwrap(), - ), - ( - "key2".to_string(), - serde_json::from_str("{\"nested_key\": \"nested_value\"}").unwrap(), - ), - ( - "list_key".to_string(), - serde_json::from_str("[\"item1\", \"item2\"]").unwrap(), - ), + let result = check_var(input).unwrap(); + let expected = BTreeMap::from([ + ("key1".to_string(), json_value(r#""value1""#)), + ("key2".to_string(), json_value(r#""value2""#)), ]); + assert_eq!(result, expected); + } - assert_eq!(result, expected_result); + #[test] + fn test_check_var_valid_json_nested_structures() { + let input = r#"{ + "parent_key": { + "child_key": "value", + "list_key": [ + "item1", + "item2" + ] + } + }"#; + let result = check_var(input).unwrap(); + let expected = BTreeMap::from([( "parent_key".to_string(), json_value(r#"{ + "child_key": "value", + "list_key": [ + "item1", + "item2" + ] + }"#),)]); + assert_eq!(result, expected); } #[test] - fn test_check_var_with_yaml_scalars() { + fn test_check_var_valid_json_scalars() { let test_cases = vec![ - ("key: \"hello world\"", "hello world"), - ("key: 123", "123"), - ("key: true", "true"), - ("key: null", "null"), + (r#"{"key": "hello world"}"#, r#""hello world""#), // String + (r#"{"key": 123}"#, r#"123"#), // Number + (r#"{"key": true}"#, r#"true"#), // Boolean + (r#"{"key": null}"#, r#"null"#), // Null ]; - for (input, expected_value) in test_cases { + for (input, expected_value_json) in test_cases { let result = check_var(input).unwrap(); - let expected = BTreeMap::from([( - "key".to_string(), - dbt_serde_yaml::from_str(expected_value).unwrap(), - )]); + let expected = BTreeMap::from([( "key".to_string(), json_value(expected_value_json),)]); assert_eq!(result, expected, "Failed for input: {}", input); } } + // ------------------------------------------------------------------------------------------ + // Tests for Disallowed Flow-Style YAML (Missing Space) #[test] - fn test_check_var_with_yaml_nested_maps_and_lists() { - let input = r#" -parent_key: - child_key: value - list_key: - - item1 - - item2 - "#; - let result = check_var(input).unwrap(); - let expected = BTreeMap::from([( - "parent_key".to_string(), - dbt_serde_yaml::from_str( - r#"{ - child_key: value, - list_key: [item1, item2] - }"#, - ) - .unwrap(), - )]); - assert_eq!(result, expected, "Failed for nested YAML input"); - - let input_complex = r#" -data: - users: - - id: 1 - name: Alice - - id: 2 - name: Bob - products: - - name: Laptop - price: 1200 - - name: Mouse - price: 25 - "#; - let result_complex = check_var(input_complex).unwrap(); - let expected_complex = BTreeMap::from([( - "data".to_string(), - dbt_serde_yaml::from_str( - r#"{ - users: [{id: 1, name: Alice}, {id: 2, name: Bob}], - products: [{name: Laptop, price: 1200}, {name: Mouse, price: 25}] - }"#, - ) - .unwrap(), - )]); - assert_eq!(result_complex, expected_complex, "Failed for complex nested YAML input"); + fn test_check_var_disallowed_flow_yaml_no_space_single() { + let input = "{key:value}"; + let result = check_var(input); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), "Invalid YAML format: Missing space after colon in flow-style mapping. Example: '{key: value}'"); } #[test] - fn test_check_var_with_json_scalars() { + fn test_check_var_disallowed_flow_yaml_no_space_multiple() { + let input = "{key1:value1,key2:value2}"; + let result = check_var(input); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), "Invalid YAML format: Missing space after colon in flow-style mapping. Example: '{key: value}'"); + } + + #[test] + fn test_check_var_disallowed_flow_yaml_mixed_space() { + let input = "{key1:value1, key2: value2}"; + let result = check_var(input); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), "Invalid YAML format: Missing space after colon in flow-style mapping. Example: '{key: value}'"); + } + + // ------------------------------------------------------------------------------------------ + // Tests for Other Invalid Inputs + #[test] + fn test_check_var_invalid_empty_input() { + assert!(check_var("").is_err()); + assert_eq!(check_var("").unwrap_err(), "Empty vars input is not valid"); + + assert!(check_var(" ").is_err()); + assert_eq!(check_var(" ").unwrap_err(), "Empty vars input is not valid"); + } + + #[test] + fn test_check_var_invalid_non_map_root() { let test_cases = vec![ - (r#"{ "key": "hello world" }"#, r#""hello world""#), - (r#"{ "key": 123 }"#, r#"123"#), - (r#"{ "key": true }"#, r#"true"#), - (r#"{ "key": null }"#, r#"null"#), + "123", // Not a map (integer) + "true", // Not a map (boolean) + "- item", // YAML sequence + "[1,2,3]", // JSON array ]; - for (input, expected_value) in test_cases { - let result = check_var(input).unwrap(); - let expected = BTreeMap::from([( - "key".to_string(), - serde_json::from_str(expected_value).unwrap(), - )]); - assert_eq!(result, expected, "Failed for input: {}", input); + for input in test_cases { + let result = check_var(input); + assert!(result.is_err(), "Should have failed for non-map input: {}", input); + let error_message = result.unwrap_err(); + assert!(error_message.contains("YAML parsing error:")); + assert!(error_message.contains("JSON parsing error:")); } } #[test] - fn test_check_var_with_json_nested_maps_and_lists() { - let input = r#"{ - "parent_key": { - "child_key": "value", - "list_key": [ - "item1", - "item2" - ] - } - }"#; - let result = check_var(input).unwrap(); - let expected = BTreeMap::from([( - "parent_key".to_string(), - serde_json::from_str( - r#"{ - "child_key": "value", - "list_key": ["item1", "item2"] - }"#, - ) - .unwrap(), - )]); - assert_eq!(result, expected, "Failed for nested JSON input"); - - let input_complex = r#"{ - "data": { - "users": [ - {"id": 1, "name": "Alice"}, - {"id": 2, "name": "Bob"} - ], - "products": [ - {"name": "Laptop", "price": 1200}, - {"name": "Mouse", "price": 25} - ] - } - }"#; - let result_complex = check_var(input_complex).unwrap(); - let expected_complex = BTreeMap::from([( - "data".to_string(), - serde_json::from_str( - r#"{ - "users": [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}], - "products": [{"name": "Laptop", "price": 1200}, {"name": "Mouse", "price": 25}] - }"#, - ) - .unwrap(), - )]); - assert_eq!(result_complex, expected_complex, "Failed for complex nested JSON input"); + fn test_check_var_invalid_incomplete_structures() { + let test_cases = vec![ + "{", // Incomplete JSON/YAML object + "key: [", // Incomplete YAML list + "key: {", // Incomplete YAML map + "key", // Missing colon or not a map + "key value", // Missing colon + ]; + + for input in test_cases { + let result = check_var(input); + assert!(result.is_err(), "Should have failed for incomplete structure: {}", input); + let error_message = result.unwrap_err(); + assert!(error_message.contains("YAML parsing error:")); + assert!(error_message.contains("JSON parsing error:")); + } } + // Keep existing env_var tests if they are desired. + // Or move them to a separate test module if check_env_var becomes complex enough. #[test] - fn test_check_var_with_mixed_yaml_styles() { - let input = r#" + fn test_check_env_var_from_yaml_file() { + let temp_dir = tempfile::tempdir().unwrap(); + let file_path = temp_dir.path().join("test_env_vars.yml"); + let file_content = r#" key1: value1 -key2: [value21, value22, value23] -key3: {key31: value31, key32: value32} +key2: value2 "#; - let result = check_var(input).unwrap(); + fs::write(&file_path, file_content).unwrap(); - let expected = BTreeMap::from([ - ("key1".to_string(), dbt_serde_yaml::from_str("value1").unwrap()), - ("key2".to_string(), dbt_serde_yaml::from_str("[value21, value22, value23]").unwrap()), - ("key3".to_string(), dbt_serde_yaml::from_str("{key31: value31, key32: value32}").unwrap()), + let result = check_env_var(file_path.to_str().unwrap()).unwrap(); + let expected_result = HashMap::from([ + ("key1".to_string(), "value1".to_string()), + ("key2".to_string(), "value2".to_string()), ]); - assert_eq!(result, expected, "Failed for mixed YAML styles"); + assert_eq!(result, expected_result); } #[test] - fn test_check_var_invalid_new_cases() { - let invalid_vars = vec![ - "", // Empty string - " ", // Whitespace only - ": key", // Invalid YAML (starts with colon) - "key value", // Missing colon - "{", // Incomplete JSON/YAML - "key: [", // Incomplete YAML list - "key: {", // Incomplete YAML map - "123", // Not a map - "true", // Not a map - "key: - value", // YAML sequence instead of map - "- item", // YAML sequence instead of map - "[1,2,3]", // JSON array instead of map - ]; + fn test_check_env_var_file_wrong_extension() { + let temp_dir = tempfile::tempdir().unwrap(); + let file_path = temp_dir.path().join("test_env_vars.txt"); + fs::write(&file_path, "key: value").unwrap(); - for var in invalid_vars { - assert!(check_var(var).is_err(), "Should have failed for input: {}", var); - } + let result = check_env_var(file_path.to_str().unwrap()); + assert!(result.is_err()); + assert_eq!(result.unwrap_err(), "File must have a .yml extension"); } + #[test] + fn test_check_env_var_invalid_file_path() { + let result = check_env_var("non_existent_file.yml"); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + "Value must be a .yml file or a yml string like so: '{ dialect: trino }'" + ); + } } From 0dcb1d5477451c43bfce92f9f5652b1a976df274 Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Tue, 15 Jul 2025 21:26:59 +0900 Subject: [PATCH 4/6] update Signed-off-by: Yu Ishikawa --- crates/dbt-common/Cargo.toml | 3 +- crates/dbt-common/src/io_args.rs | 79 +++++++++++++++++++++----------- 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/crates/dbt-common/Cargo.toml b/crates/dbt-common/Cargo.toml index 92b4a9f1..d66218ca 100644 --- a/crates/dbt-common/Cargo.toml +++ b/crates/dbt-common/Cargo.toml @@ -43,7 +43,7 @@ schemars = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } stringmetrics = { workspace = true } -strum = { workspace = true } +strum = { workspace = true, features = ["derive"] } strum_macros = { workspace = true } term_size = { workspace = true } @@ -66,6 +66,7 @@ powershell_script = "1.1.0" [dev-dependencies] indoc = { workspace = true } +tempfile = { workspace = true } [lib] name = "dbt_common" diff --git a/crates/dbt-common/src/io_args.rs b/crates/dbt-common/src/io_args.rs index 7599b5af..825010f0 100644 --- a/crates/dbt-common/src/io_args.rs +++ b/crates/dbt-common/src/io_args.rs @@ -613,13 +613,19 @@ pub fn check_var(vars: &str) -> Result, String> { // This is an invalid YAML syntax according to the spec but might be leniently parsed by some libraries. // We want to explicitly disallow it to prevent incorrect interpretations (like 'value' being null). if vars.starts_with('{') && vars.contains(':') { - let potential_pairs: Vec<&str> = vars.trim_matches('{').trim_matches('}').split(',').collect(); + let potential_pairs: Vec<&str> = vars + .trim_matches('{') + .trim_matches('}') + .split(',') + .collect(); for pair in potential_pairs { let trimmed_pair = pair.trim(); // Trim leading/trailing whitespace from each pair if let Some(colon_idx) = trimmed_pair.find(':') { // Check if the character immediately after the colon is not a space // and ensure it's not the end of the string (e.g., "key:") - if colon_idx + 1 < trimmed_pair.len() && trimmed_pair.chars().nth(colon_idx + 1) != Some(' ') { + if colon_idx + 1 < trimmed_pair.len() + && trimmed_pair.chars().nth(colon_idx + 1) != Some(' ') + { return Err("Invalid YAML format: Missing space after colon in flow-style mapping. Example: '{key: value}'".to_string()); } } @@ -729,12 +735,17 @@ parent_key: - item2 "#; let result = check_var(input).unwrap(); - let expected = BTreeMap::from([( "parent_key".to_string(), yaml_value(r#" + let expected = BTreeMap::from([( + "parent_key".to_string(), + yaml_value( + r#" child_key: value list_key: - item1 - item2 - "#),)]); + "#, + ), + )]); assert_eq!(result, expected); } @@ -742,14 +753,14 @@ parent_key: fn test_check_var_valid_yaml_scalars() { let test_cases = vec![ ("key: \"hello world\"", "\"hello world\""), // String - ("key: 123", "123"), // Integer - ("key: true", "true"), // Boolean - ("key: null", "null"), // Null + ("key: 123", "123"), // Integer + ("key: true", "true"), // Boolean + ("key: null", "null"), // Null ]; for (input, expected_value_yaml) in test_cases { let result = check_var(input).unwrap(); - let expected = BTreeMap::from([( "key".to_string(), yaml_value(expected_value_yaml),)]); + let expected = BTreeMap::from([("key".to_string(), yaml_value(expected_value_yaml))]); assert_eq!(result, expected, "Failed for input: {}", input); } } @@ -806,13 +817,18 @@ parent_key: } }"#; let result = check_var(input).unwrap(); - let expected = BTreeMap::from([( "parent_key".to_string(), json_value(r#"{ + let expected = BTreeMap::from([( + "parent_key".to_string(), + json_value( + r#"{ "child_key": "value", "list_key": [ "item1", "item2" ] - }"#),)]); + }"#, + ), + )]); assert_eq!(result, expected); } @@ -820,14 +836,14 @@ parent_key: fn test_check_var_valid_json_scalars() { let test_cases = vec![ (r#"{"key": "hello world"}"#, r#""hello world""#), // String - (r#"{"key": 123}"#, r#"123"#), // Number - (r#"{"key": true}"#, r#"true"#), // Boolean - (r#"{"key": null}"#, r#"null"#), // Null + (r#"{"key": 123}"#, r#"123"#), // Number + (r#"{"key": true}"#, r#"true"#), // Boolean + (r#"{"key": null}"#, r#"null"#), // Null ]; for (input, expected_value_json) in test_cases { let result = check_var(input).unwrap(); - let expected = BTreeMap::from([( "key".to_string(), json_value(expected_value_json),)]); + let expected = BTreeMap::from([("key".to_string(), json_value(expected_value_json))]); assert_eq!(result, expected, "Failed for input: {}", input); } } @@ -866,21 +882,28 @@ parent_key: assert_eq!(check_var("").unwrap_err(), "Empty vars input is not valid"); assert!(check_var(" ").is_err()); - assert_eq!(check_var(" ").unwrap_err(), "Empty vars input is not valid"); + assert_eq!( + check_var(" ").unwrap_err(), + "Empty vars input is not valid" + ); } #[test] fn test_check_var_invalid_non_map_root() { let test_cases = vec![ - "123", // Not a map (integer) - "true", // Not a map (boolean) - "- item", // YAML sequence - "[1,2,3]", // JSON array + "123", // Not a map (integer) + "true", // Not a map (boolean) + "- item", // YAML sequence + "[1,2,3]", // JSON array ]; for input in test_cases { let result = check_var(input); - assert!(result.is_err(), "Should have failed for non-map input: {}", input); + assert!( + result.is_err(), + "Should have failed for non-map input: {}", + input + ); let error_message = result.unwrap_err(); assert!(error_message.contains("YAML parsing error:")); assert!(error_message.contains("JSON parsing error:")); @@ -890,16 +913,20 @@ parent_key: #[test] fn test_check_var_invalid_incomplete_structures() { let test_cases = vec![ - "{", // Incomplete JSON/YAML object - "key: [", // Incomplete YAML list - "key: {", // Incomplete YAML map - "key", // Missing colon or not a map - "key value", // Missing colon + "{", // Incomplete JSON/YAML object + "key: [", // Incomplete YAML list + "key: {", // Incomplete YAML map + "key", // Missing colon or not a map + "key value", // Missing colon ]; for input in test_cases { let result = check_var(input); - assert!(result.is_err(), "Should have failed for incomplete structure: {}", input); + assert!( + result.is_err(), + "Should have failed for incomplete structure: {}", + input + ); let error_message = result.unwrap_err(); assert!(error_message.contains("YAML parsing error:")); assert!(error_message.contains("JSON parsing error:")); From b806c70ec23f172a7b42716167e867a2bd625514 Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Tue, 29 Jul 2025 10:00:36 +0900 Subject: [PATCH 5/6] Move the comment Signed-off-by: Yu Ishikawa --- crates/dbt-common/src/io_args.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/crates/dbt-common/src/io_args.rs b/crates/dbt-common/src/io_args.rs index 825010f0..9c11307a 100644 --- a/crates/dbt-common/src/io_args.rs +++ b/crates/dbt-common/src/io_args.rs @@ -609,9 +609,8 @@ pub fn check_var(vars: &str) -> Result, String> { // Try parsing as YAML first match dbt_serde_yaml::from_str::>(vars) { Ok(btree) => { - // Check for flow-style YAML without space after colon, e.g., '{key:value}' - // This is an invalid YAML syntax according to the spec but might be leniently parsed by some libraries. - // We want to explicitly disallow it to prevent incorrect interpretations (like 'value' being null). + // Disallow the '{key:value}' format for flow-style YAML syntax + // to prevent key:value: None interpretation: https://stackoverflow.com/a/70909331 if vars.starts_with('{') && vars.contains(':') { let potential_pairs: Vec<&str> = vars .trim_matches('{') @@ -631,8 +630,6 @@ pub fn check_var(vars: &str) -> Result, String> { } } } - // Disallow the '{key:value}' format for flow-style YAML syntax - // to prevent key:value: None interpretation: https://stackoverflow.com/a/70909331 Ok(btree) } Err(yaml_err) => { From d01a93bcb66597a0ab42ad5529d4861094a3f255 Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Tue, 29 Jul 2025 10:00:48 +0900 Subject: [PATCH 6/6] Modify Cargo.toml files Signed-off-by: Yu Ishikawa --- crates/dbt-common/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/dbt-common/Cargo.toml b/crates/dbt-common/Cargo.toml index d66218ca..f0c87f1c 100644 --- a/crates/dbt-common/Cargo.toml +++ b/crates/dbt-common/Cargo.toml @@ -43,7 +43,7 @@ schemars = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } stringmetrics = { workspace = true } -strum = { workspace = true, features = ["derive"] } +strum = { workspace = true } strum_macros = { workspace = true } term_size = { workspace = true }