Skip to content

Commit da500fc

Browse files
fix: Python Enum types generate proper dropdown schemas with descriptions (#7400)
* Fix Python Enum and Literal schema generation with docstring descriptions - Extract Enum class definitions and their string values - Parse docstring Args: sections for parameter descriptions - Map Enum type annotations to string enums with proper values - Handle Enum.VALUE default values correctly - Store descriptions in Arg.otyp field - Add test case for enum with docstring parsing * perf: optimize enum parser and fix default value handling - Combine enum extraction and docstring parsing into single AST pass (2x performance improvement) - Add support for IntEnum, StrEnum, Flag, IntFlag types - Fix default values to use actual enum values (e.g., 'red') instead of member names (e.g., 'RED') - Improve docstring parsing robustness with proper indentation tracking - Clean up code structure with EnumInfo type for better maintainability All tests pass. This addresses code review feedback for performance and correctness. * perf: implement true lazy evaluation for enum parsing - Only parse metadata when unknown types encountered - Two-pass approach: parse types first, extract only if needed - Zero overhead for scripts without enums - Keyword checks + prepass filtering when extraction needed
1 parent 210b828 commit da500fc

File tree

1 file changed

+268
-26
lines changed
  • backend/parsers/windmill-parser-py/src

1 file changed

+268
-26
lines changed

backend/parsers/windmill-parser-py/src/lib.rs

Lines changed: 268 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use windmill_parser::{json_to_typ, Arg, MainArgSignature, ObjectType, Typ};
1515

1616
use rustpython_parser::{
1717
ast::{
18-
Constant, Expr, ExprConstant, ExprDict, ExprList, ExprName, Stmt, StmtFunctionDef, Suite,
18+
Constant, Expr, ExprAttribute, ExprConstant, ExprDict, ExprList, ExprName, Stmt, StmtAssign, StmtClassDef, StmtFunctionDef, Suite,
1919
},
2020
Parse,
2121
};
@@ -60,6 +60,166 @@ fn filter_non_main(code: &str, main_name: &str) -> String {
6060
return filtered_code;
6161
}
6262

63+
/// Data extracted from parsing the Python code
64+
struct CodeMetadata {
65+
enums: HashMap<String, EnumInfo>,
66+
descriptions: HashMap<String, String>,
67+
}
68+
69+
/// Information about an Enum class
70+
struct EnumInfo {
71+
values: Vec<String>,
72+
members: HashMap<String, String>,
73+
}
74+
75+
fn has_enum_keyword(code: &str) -> bool {
76+
code.contains("Enum")
77+
}
78+
79+
/// Extract only class and function definitions from code (prepass filtering)
80+
fn filter_relevant_statements(code: &str) -> String {
81+
let mut result = Vec::new();
82+
let mut lines = code.lines().peekable();
83+
84+
while let Some(line) = lines.next() {
85+
let trimmed = line.trim_start();
86+
87+
if trimmed.starts_with("class ") || trimmed.starts_with("def ") {
88+
result.push(line);
89+
let base_indent = line.len() - trimmed.len();
90+
91+
while let Some(&next_line) = lines.peek() {
92+
let next_trimmed = next_line.trim_start();
93+
let next_indent = next_line.len() - next_trimmed.len();
94+
95+
if next_trimmed.is_empty() || next_indent > base_indent {
96+
result.push(lines.next().unwrap());
97+
} else {
98+
break;
99+
}
100+
}
101+
}
102+
}
103+
104+
result.join("\n")
105+
}
106+
107+
/// Extract Enum definitions and docstring descriptions lazily.
108+
/// Only parses AST if relevant keywords are present.
109+
fn extract_code_metadata(code: &str, main_name: &str) -> CodeMetadata {
110+
let mut enums = HashMap::new();
111+
let mut descriptions = HashMap::new();
112+
113+
let has_enum = has_enum_keyword(code);
114+
let has_docstring = code.contains("Args:");
115+
116+
if !has_enum && !has_docstring {
117+
return CodeMetadata { enums, descriptions };
118+
}
119+
120+
let filtered_code = filter_relevant_statements(code);
121+
122+
let ast = match Suite::parse(&filtered_code, "main.py") {
123+
Ok(ast) => ast,
124+
Err(_) => return CodeMetadata { enums, descriptions },
125+
};
126+
127+
for stmt in ast {
128+
match stmt {
129+
Stmt::ClassDef(StmtClassDef { name, body, bases, .. }) if has_enum => {
130+
let is_enum = bases.iter().any(|base| {
131+
matches!(base, Expr::Name(ExprName { id, .. })
132+
if id == "Enum" || id == "IntEnum" || id == "StrEnum"
133+
|| id == "Flag" || id == "IntFlag")
134+
});
135+
136+
if is_enum {
137+
let mut values = Vec::new();
138+
let mut members = HashMap::new();
139+
140+
for item in body {
141+
if let Stmt::Assign(StmtAssign { targets, value, .. }) = item {
142+
if let Some(Expr::Name(ExprName { id: target_name, .. })) = targets.first() {
143+
if !target_name.starts_with('_') {
144+
if let Expr::Constant(ExprConstant { value: Constant::Str(val), .. }) = value.as_ref() {
145+
values.push(val.to_string());
146+
members.insert(target_name.to_string(), val.to_string());
147+
}
148+
}
149+
}
150+
}
151+
}
152+
153+
if !values.is_empty() {
154+
enums.insert(name.to_string(), EnumInfo { values, members });
155+
}
156+
}
157+
},
158+
Stmt::FunctionDef(StmtFunctionDef { name: func_name, body, .. }) if has_docstring => {
159+
if &func_name == main_name {
160+
if let Some(Stmt::Expr(expr_stmt)) = body.first() {
161+
if let Expr::Constant(ExprConstant { value: Constant::Str(docstring), .. }) = expr_stmt.value.as_ref() {
162+
descriptions = parse_docstring_args(docstring);
163+
}
164+
}
165+
}
166+
},
167+
_ => {}
168+
}
169+
}
170+
171+
CodeMetadata { enums, descriptions }
172+
}
173+
174+
/// Parse docstring Args: section (format: "param_name (type): Description")
175+
fn parse_docstring_args(docstring: &str) -> HashMap<String, String> {
176+
let mut descriptions = HashMap::new();
177+
let mut in_args_section = false;
178+
let mut base_indent: Option<usize> = None;
179+
180+
for line in docstring.lines() {
181+
let trimmed = line.trim();
182+
183+
if trimmed == "Args:" {
184+
in_args_section = true;
185+
base_indent = None;
186+
continue;
187+
}
188+
189+
if in_args_section {
190+
if trimmed.is_empty() {
191+
continue;
192+
}
193+
194+
let indent = line.len() - line.trim_start().len();
195+
196+
if base_indent.is_none() && !trimmed.is_empty() {
197+
base_indent = Some(indent);
198+
}
199+
200+
if let Some(base) = base_indent {
201+
if indent < base && trimmed.ends_with(':') {
202+
break;
203+
}
204+
}
205+
206+
if let Some(colon_pos) = trimmed.find(':') {
207+
let before_colon = &trimmed[..colon_pos];
208+
let description = trimmed[colon_pos + 1..].trim();
209+
210+
if let Some(paren_pos) = before_colon.find('(') {
211+
let param_name = before_colon[..paren_pos].trim();
212+
descriptions.insert(param_name.to_string(), description.to_string());
213+
} else {
214+
descriptions.insert(before_colon.trim().to_string(), description.to_string());
215+
}
216+
}
217+
}
218+
}
219+
220+
descriptions
221+
}
222+
63223
/// skip_params is a micro optimization for when we just want to find the main
64224
/// function without parsing all the params.
65225
pub fn parse_python_signature(
@@ -91,27 +251,61 @@ pub fn parse_python_signature(
91251

92252
if !skip_params && params.is_some() {
93253
let params = params.unwrap();
94-
//println!("{:?}", params);
95254
let def_arg_start = params.args.len() - params.defaults().count();
255+
256+
// Two-pass approach for lazy metadata extraction:
257+
// Pass 1: Parse types without enum info to determine if metadata is needed
258+
// Pass 2: Re-parse unknown types with metadata only if necessary
259+
// This ensures zero overhead for scripts without enums/docstrings
260+
261+
let empty_enums = HashMap::new();
262+
let args_first_pass: Vec<_> = params
263+
.args
264+
.iter()
265+
.enumerate()
266+
.map(|(i, x)| {
267+
let arg_name = x.as_arg().arg.to_string();
268+
let (typ, has_default) = x
269+
.as_arg()
270+
.annotation
271+
.as_ref()
272+
.map_or((Typ::Unknown, false), |e| parse_expr(e, &empty_enums));
273+
(i, arg_name, typ, has_default)
274+
})
275+
.collect();
276+
277+
// Determine if we need to extract metadata from the code
278+
let has_potential_enums = args_first_pass
279+
.iter()
280+
.any(|(_, _, typ, _)| matches!(typ, Typ::Resource(_)));
281+
282+
let metadata = if has_potential_enums || code.contains("Args:") {
283+
extract_code_metadata(code, &main_name)
284+
} else {
285+
CodeMetadata {
286+
enums: HashMap::new(),
287+
descriptions: HashMap::new(),
288+
}
289+
};
290+
291+
// Build final args, re-parsing Resource types as enums if metadata was extracted
96292
Ok(MainArgSignature {
97293
star_args: params.vararg.is_some(),
98294
star_kwargs: params.kwarg.is_some(),
99-
args: params
100-
.args
101-
.iter()
102-
.enumerate()
103-
.map(|(i, x)| {
104-
let (mut typ, has_default) = x
105-
.as_arg()
106-
.annotation
107-
.as_ref()
108-
.map_or((Typ::Unknown, false), |e| parse_expr(e));
295+
args: args_first_pass
296+
.into_iter()
297+
.map(|(i, arg_name, mut typ, mut has_default)| {
298+
if matches!(typ, Typ::Resource(_)) && !metadata.enums.is_empty() {
299+
if let Some(annotation) = params.args[i].as_arg().annotation.as_ref() {
300+
(typ, has_default) = parse_expr(annotation, &metadata.enums);
301+
}
302+
}
109303

110304
let default = if i >= def_arg_start {
111305
params
112306
.defaults()
113307
.nth(i - def_arg_start)
114-
.map(to_value)
308+
.map(|expr| to_value(expr, &metadata.enums))
115309
.flatten()
116310
} else {
117311
None
@@ -140,8 +334,8 @@ pub fn parse_python_signature(
140334
}
141335

142336
Arg {
143-
otyp: None,
144-
name: x.as_arg().arg.to_string(),
337+
otyp: metadata.descriptions.get(&arg_name).map(|d| d.to_string()),
338+
name: arg_name,
145339
typ,
146340
has_default: has_default || default.is_some(),
147341
default,
@@ -163,15 +357,15 @@ pub fn parse_python_signature(
163357
}
164358
}
165359

166-
fn parse_expr(e: &Box<Expr>) -> (Typ, bool) {
360+
fn parse_expr(e: &Box<Expr>, enums: &HashMap<String, EnumInfo>) -> (Typ, bool) {
167361
match e.as_ref() {
168-
Expr::Name(ExprName { id, .. }) => (parse_typ(id.as_ref()), false),
362+
Expr::Name(ExprName { id, .. }) => (parse_typ(id.as_ref(), enums), false),
169363
Expr::Attribute(x) => {
170364
if x.value
171365
.as_name_expr()
172366
.is_some_and(|x| x.id.as_str() == "wmill")
173367
{
174-
(parse_typ(x.attr.as_str()), false)
368+
(parse_typ(x.attr.as_str(), enums), false)
175369
} else {
176370
(Typ::Unknown, false)
177371
}
@@ -181,7 +375,7 @@ fn parse_expr(e: &Box<Expr>) -> (Typ, bool) {
181375
x.right.as_ref(),
182376
Expr::Constant(ExprConstant { value: Constant::None, .. })
183377
) {
184-
(parse_expr(&x.left).0, true)
378+
(parse_expr(&x.left, enums).0, true)
185379
} else {
186380
(Typ::Unknown, false)
187381
}
@@ -210,8 +404,8 @@ fn parse_expr(e: &Box<Expr>) -> (Typ, bool) {
210404
};
211405
(Typ::Str(values), false)
212406
}
213-
"List" | "list" => (Typ::List(Box::new(parse_expr(&x.slice).0)), false),
214-
"Optional" => (parse_expr(&x.slice).0, true),
407+
"List" | "list" => (Typ::List(Box::new(parse_expr(&x.slice, enums).0)), false),
408+
"Optional" => (parse_expr(&x.slice, enums).0, true),
215409
_ => (Typ::Unknown, false),
216410
},
217411
_ => (Typ::Unknown, false),
@@ -220,7 +414,11 @@ fn parse_expr(e: &Box<Expr>) -> (Typ, bool) {
220414
}
221415
}
222416

223-
fn parse_typ(id: &str) -> Typ {
417+
fn parse_typ(id: &str, enums: &HashMap<String, EnumInfo>) -> Typ {
418+
if let Some(enum_info) = enums.get(id) {
419+
return Typ::Str(Some(enum_info.values.clone()));
420+
}
421+
224422
match id {
225423
"str" => Typ::Str(None),
226424
"float" => Typ::Float,
@@ -249,7 +447,7 @@ fn map_resource_name(x: &str) -> String {
249447
}
250448
}
251449

252-
fn to_value<R>(et: &Expr<R>) -> Option<serde_json::Value> {
450+
fn to_value<R>(et: &Expr<R>, enums: &HashMap<String, EnumInfo>) -> Option<serde_json::Value> {
253451
match et {
254452
Expr::Constant(ExprConstant { value, .. }) => Some(constant_to_value(value)),
255453
Expr::Dict(ExprDict { keys, values, .. }) => {
@@ -259,22 +457,35 @@ fn to_value<R>(et: &Expr<R>) -> Option<serde_json::Value> {
259457
.map(|(k, v)| {
260458
let key = k
261459
.as_ref()
262-
.map(to_value)
460+
.map(|e| to_value(e, enums))
263461
.flatten()
264462
.and_then(|x| match x {
265463
serde_json::Value::String(s) => Some(s),
266464
_ => None,
267465
})
268466
.unwrap_or_else(|| "no_key".to_string());
269-
(key, to_value(&v))
467+
(key, to_value(&v, enums))
270468
})
271469
.collect::<HashMap<String, _>>();
272470
Some(json!(v))
273471
}
274472
Expr::List(ExprList { elts, .. }) => {
275-
let v = elts.into_iter().map(|x| to_value(&x)).collect::<Vec<_>>();
473+
let v = elts.into_iter().map(|x| to_value(&x, enums)).collect::<Vec<_>>();
276474
Some(json!(v))
277475
}
476+
Expr::Attribute(ExprAttribute { value, attr, .. }) => {
477+
// Handle Enum.MEMBER: returns enum value ("red") not member name ("RED")
478+
if let Expr::Name(ExprName { id: enum_name, .. }) = value.as_ref() {
479+
if let Some(enum_info) = enums.get(enum_name.as_str()) {
480+
if let Some(enum_value) = enum_info.members.get(attr.as_str()) {
481+
return Some(json!(enum_value));
482+
}
483+
}
484+
Some(json!(attr.as_str()))
485+
} else {
486+
None
487+
}
488+
}
278489
Expr::Call { .. } => Some(json!(FUNCTION_CALL)),
279490
_ => None,
280491
}
@@ -751,4 +962,35 @@ def main(a: str, b: Optional[str], c: str | None): return
751962

752963
Ok(())
753964
}
965+
966+
#[test]
967+
fn test_parse_python_sig_enum() -> anyhow::Result<()> {
968+
let code = r#"
969+
from enum import Enum
970+
971+
class Color(str, Enum):
972+
RED = 'red'
973+
GREEN = 'green'
974+
BLUE = 'blue'
975+
976+
def main(color: Color = Color.RED):
977+
"""
978+
Test enum parsing
979+
980+
Args:
981+
color (Color): Color selection from Color enum
982+
"""
983+
return {"color": color}
984+
"#;
985+
let result = parse_python_signature(code, None, false)?;
986+
assert_eq!(result.args.len(), 1);
987+
assert_eq!(result.args[0].name, "color");
988+
assert_eq!(
989+
result.args[0].typ,
990+
Typ::Str(Some(vec!["red".to_string(), "green".to_string(), "blue".to_string()]))
991+
);
992+
assert_eq!(result.args[0].default, Some(json!("red")));
993+
assert_eq!(result.args[0].otyp, Some("Color selection from Color enum".to_string()));
994+
Ok(())
995+
}
754996
}

0 commit comments

Comments
 (0)