Skip to content

Commit 359d9e4

Browse files
committed
fix(analyzer): add E660 type check for non-String Embed() arguments
1 parent 15fe348 commit 359d9e4

File tree

5 files changed

+137
-7
lines changed

5 files changed

+137
-7
lines changed

helix-db/src/helixc/analyzer/error_codes.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ pub enum ErrorCode {
138138
E658,
139139
/// `E659` - `WHERE clause expression does not evaluate to a boolean`
140140
E659,
141+
/// `E660` - `Embed() argument must be a String`
142+
E660,
141143

142144
/// `W101` - `query has no return`
143145
W101,
@@ -215,6 +217,7 @@ impl ErrorCode {
215217
ErrorCode::E657 => "step requires a previous step",
216218
ErrorCode::E658 => "field not found in object type",
217219
ErrorCode::E659 => "WHERE clause expression is not a boolean",
220+
ErrorCode::E660 => "Embed() argument must be a String",
218221
// Warnings
219222
ErrorCode::W101 => "query has no return",
220223
}
@@ -284,6 +287,7 @@ impl std::fmt::Display for ErrorCode {
284287
ErrorCode::E657 => write!(f, "E657"),
285288
ErrorCode::E658 => write!(f, "E658"),
286289
ErrorCode::E659 => write!(f, "E659"),
290+
ErrorCode::E660 => write!(f, "E660"),
287291
ErrorCode::W101 => write!(f, "W101"),
288292
}
289293
}
@@ -392,6 +396,7 @@ implement_error_code!(E656, "unsupported type conversion from `{}`" => { type_na
392396
implement_error_code!(E657, "step `{}` requires a previous step but none was found" => { step_name }, "ensure this step follows a property access" => {});
393397
implement_error_code!(E658, "field `{}` not found in object type" => { field_name }, "check the field name or use a valid field" => {});
394398
implement_error_code!(E659, "WHERE clause expression should evaluate to a boolean, but got a `{}` traversal" => { expression_type }, "wrap the traversal with `EXISTS(...)` to check if any results exist" => {});
399+
implement_error_code!(E660, "Embed() requires a String argument, but got `{}`" => { actual_type }, "ensure the argument passed to Embed() is of type String" => {});
395400

396401
#[macro_export]
397402
macro_rules! generate_error {

helix-db/src/helixc/analyzer/methods/graph_step_validation.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Semantic analyzer for Helix‑QL.
22
use crate::helixc::analyzer::error_codes::ErrorCode;
3-
use crate::helixc::analyzer::utils::{VariableInfo, type_in_scope};
3+
use crate::helixc::analyzer::utils::{VariableInfo, type_in_scope, validate_embed_string_type};
44
use crate::helixc::generator::traversal_steps::EdgeType;
55
use crate::helixc::generator::utils::EmbedData;
66
use crate::{
@@ -675,10 +675,14 @@ pub(crate) fn apply_graph_step<'a>(
675675
}
676676
Some(VectorData::Embed(e)) => {
677677
let embed_data = match &e.value {
678-
EvaluatesToString::Identifier(i) => EmbedData {
679-
data: gen_identifier_or_param(original_query, i.as_str(), true, false),
680-
model_name: gen_query.embedding_model_to_use.clone(),
681-
},
678+
EvaluatesToString::Identifier(i) => {
679+
type_in_scope(ctx, original_query, sv.loc.clone(), scope, i.as_str());
680+
validate_embed_string_type(ctx, original_query, sv.loc.clone(), scope, i.as_str());
681+
EmbedData {
682+
data: gen_identifier_or_param(original_query, i.as_str(), true, false),
683+
model_name: gen_query.embedding_model_to_use.clone(),
684+
}
685+
}
682686
EvaluatesToString::StringLiteral(s) => EmbedData {
683687
data: GeneratedValue::Literal(GenRef::Ref(s.clone())),
684688
model_name: gen_query.embedding_model_to_use.clone(),

helix-db/src/helixc/analyzer/methods/infer_expr_type.rs

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
//! Semantic analyzer for Helix‑QL.
22
use crate::helixc::analyzer::error_codes::ErrorCode;
33
use crate::helixc::analyzer::utils::{
4-
DEFAULT_VAR_NAME, VariableInfo, is_in_scope, is_param, validate_id_type,
4+
DEFAULT_VAR_NAME, VariableInfo, is_in_scope, is_param, validate_embed_string_type,
5+
validate_id_type,
56
};
67
use crate::helixc::generator::utils::EmbedData;
78
use crate::{
@@ -1073,6 +1074,7 @@ pub(crate) fn infer_expr_type<'a>(
10731074
scope,
10741075
i.as_str(),
10751076
);
1077+
validate_embed_string_type(ctx, original_query, add.loc.clone(), scope, i.as_str());
10761078
EmbedData {
10771079
data: gen_identifier_or_param(
10781080
original_query,
@@ -1180,6 +1182,7 @@ pub(crate) fn infer_expr_type<'a>(
11801182
let embed_data = match &e.value {
11811183
EvaluatesToString::Identifier(i) => {
11821184
type_in_scope(ctx, original_query, sv.loc.clone(), scope, i.as_str());
1185+
validate_embed_string_type(ctx, original_query, sv.loc.clone(), scope, i.as_str());
11831186
EmbedData {
11841187
data: gen_identifier_or_param(
11851188
original_query,
@@ -2021,4 +2024,84 @@ mod tests {
20212024
let (diagnostics, _) = result.unwrap();
20222025
assert!(diagnostics.iter().any(|d| d.error_code == ErrorCode::E659));
20232026
}
2027+
2028+
// ============================================================================
2029+
// Embed() String Type Check Tests (E660)
2030+
// ============================================================================
2031+
2032+
#[test]
2033+
fn test_add_vector_embed_with_string_param_valid() {
2034+
let source = r#"
2035+
V::Document { content: String }
2036+
2037+
QUERY test(text: String) =>
2038+
doc <- AddV<Document>(Embed(text), {content: text})
2039+
RETURN doc
2040+
"#;
2041+
2042+
let content = write_to_temp_file(vec![source]);
2043+
let parsed = HelixParser::parse_source(&content).unwrap();
2044+
let result = crate::helixc::analyzer::analyze(&parsed);
2045+
2046+
assert!(result.is_ok());
2047+
let (diagnostics, _) = result.unwrap();
2048+
assert!(!diagnostics.iter().any(|d| d.error_code == ErrorCode::E660));
2049+
}
2050+
2051+
#[test]
2052+
fn test_add_vector_embed_with_string_literal_valid() {
2053+
let source = r#"
2054+
V::Document { content: String }
2055+
2056+
QUERY test() =>
2057+
doc <- AddV<Document>(Embed("hello world"), {content: "hello world"})
2058+
RETURN doc
2059+
"#;
2060+
2061+
let content = write_to_temp_file(vec![source]);
2062+
let parsed = HelixParser::parse_source(&content).unwrap();
2063+
let result = crate::helixc::analyzer::analyze(&parsed);
2064+
2065+
assert!(result.is_ok());
2066+
let (diagnostics, _) = result.unwrap();
2067+
assert!(!diagnostics.iter().any(|d| d.error_code == ErrorCode::E660));
2068+
}
2069+
2070+
#[test]
2071+
fn test_add_vector_embed_with_non_string_param_emits_e660() {
2072+
let source = r#"
2073+
V::Document { content: String }
2074+
2075+
QUERY test(num: I32) =>
2076+
doc <- AddV<Document>(Embed(num), {content: "test"})
2077+
RETURN doc
2078+
"#;
2079+
2080+
let content = write_to_temp_file(vec![source]);
2081+
let parsed = HelixParser::parse_source(&content).unwrap();
2082+
let result = crate::helixc::analyzer::analyze(&parsed);
2083+
2084+
assert!(result.is_ok());
2085+
let (diagnostics, _) = result.unwrap();
2086+
assert!(diagnostics.iter().any(|d| d.error_code == ErrorCode::E660));
2087+
}
2088+
2089+
#[test]
2090+
fn test_search_vector_embed_with_non_string_param_emits_e660() {
2091+
let source = r#"
2092+
V::Document { content: String }
2093+
2094+
QUERY test(num: I32) =>
2095+
docs <- SearchV<Document>(Embed(num), 10)
2096+
RETURN docs
2097+
"#;
2098+
2099+
let content = write_to_temp_file(vec![source]);
2100+
let parsed = HelixParser::parse_source(&content).unwrap();
2101+
let result = crate::helixc::analyzer::analyze(&parsed);
2102+
2103+
assert!(result.is_ok());
2104+
let (diagnostics, _) = result.unwrap();
2105+
assert!(diagnostics.iter().any(|d| d.error_code == ErrorCode::E660));
2106+
}
20242107
}

helix-db/src/helixc/analyzer/methods/traversal_validation.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::helixc::analyzer::error_codes::*;
22
use crate::helixc::analyzer::utils::{
3-
DEFAULT_VAR_NAME, VariableInfo, check_identifier_is_fieldtype,
3+
DEFAULT_VAR_NAME, VariableInfo, check_identifier_is_fieldtype, validate_embed_string_type,
44
};
55
use crate::helixc::generator::bool_ops::{Contains, IsIn, PropertyEq, PropertyNeq};
66
use crate::helixc::generator::source_steps::{SearchVector, VFromID, VFromType};
@@ -549,6 +549,7 @@ pub(crate) fn validate_traversal<'a>(
549549
let embed_data = match &e.value {
550550
EvaluatesToString::Identifier(i) => {
551551
type_in_scope(ctx, original_query, sv.loc.clone(), scope, i.as_str());
552+
validate_embed_string_type(ctx, original_query, sv.loc.clone(), scope, i.as_str());
552553
EmbedData {
553554
data: gen_identifier_or_param(
554555
original_query,
@@ -2441,6 +2442,7 @@ pub(crate) fn validate_traversal<'a>(
24412442
scope,
24422443
id.as_str(),
24432444
);
2445+
validate_embed_string_type(ctx, original_query, embed.loc.clone(), scope, id.as_str());
24442446
EmbedData {
24452447
data: gen_identifier_or_param(
24462448
original_query,

helix-db/src/helixc/analyzer/utils.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,42 @@ pub(super) fn validate_id_type(
152152
}
153153
}
154154

155+
/// Validates that an identifier used in Embed() is of type String.
156+
pub(super) fn validate_embed_string_type(
157+
ctx: &mut Ctx,
158+
original_query: &Query,
159+
loc: Loc,
160+
scope: &HashMap<&str, VariableInfo>,
161+
identifier_name: &str,
162+
) {
163+
// Check if it's a parameter
164+
if let Some(param) = is_param(original_query, identifier_name) {
165+
if param.param_type.1 != FieldType::String {
166+
generate_error!(
167+
ctx,
168+
original_query,
169+
loc,
170+
E660,
171+
&param.param_type.1.to_string()
172+
);
173+
}
174+
return;
175+
}
176+
177+
// Check if it's a scope variable
178+
if let Some(var_info) = scope.get(identifier_name) {
179+
if var_info.ty != Type::Scalar(FieldType::String) {
180+
generate_error!(
181+
ctx,
182+
original_query,
183+
loc,
184+
E660,
185+
&var_info.ty.to_string()
186+
);
187+
}
188+
}
189+
}
190+
155191
pub(super) fn is_in_scope(scope: &HashMap<&str, VariableInfo>, name: &str) -> bool {
156192
scope.contains_key(name)
157193
}

0 commit comments

Comments
 (0)