Skip to content

Commit 520886b

Browse files
authored
Add recipe valiation for field names and format (#226)
* Add recipe valiation for field names and format Tested by running ` hyperpod start-job --recipe <file-path>` for all 122 recipe files and there's no validation error * Update error message
1 parent c20815e commit 520886b

File tree

10 files changed

+810
-10
lines changed

10 files changed

+810
-10
lines changed

src/hyperpod_cli/validators/job_validator.py

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@
1414
import os
1515
import yaml
1616
from yaml.loader import SafeLoader
17-
from typing import Optional, List
18-
17+
from typing import Optional
1918
from hyperpod_cli.clients.kubernetes_client import KubernetesClient
2019
from hyperpod_cli.constants.command_constants import (
2120
KUEUE_WORKLOAD_PRIORITY_CLASS_LABEL_KEY,
@@ -34,6 +33,7 @@
3433
from hyperpod_cli.validators.validator import (
3534
Validator,
3635
)
36+
from hyperpod_cli.validators.recipe_models import HfRecipeSchema, NovaRecipeSchema, NeuronHfRecipeSchema, NovaEvaluationRecipeSchema
3737

3838
logger = setup_logger(__name__)
3939

@@ -293,14 +293,56 @@ def validate_scheduler_related_fields(
293293
return True
294294

295295
def validate_recipe_file(recipe: str):
296-
full_recipe_path = os.path.join(RECIPES_DIR, f"{recipe}.yaml")
297-
298-
if os.path.exists(full_recipe_path) and os.path.isfile(full_recipe_path):
299-
logger.info(f"Recipe file found: {full_recipe_path}")
300-
return True
301-
302-
logger.error(f"Recipe file '{recipe}.yaml' not found in {RECIPES_DIR}")
303-
return False
296+
recipe_path = os.path.join(RECIPES_DIR, f"{recipe}.yaml")
297+
298+
if not os.path.exists(recipe_path):
299+
logger.error(f"Recipe file not found: {recipe_path}")
300+
return False
301+
302+
# validate yaml field names
303+
try:
304+
with open(recipe_path, "r") as f:
305+
recipe_data = yaml.safe_load(f)
306+
307+
if "run" in recipe_data and "model_type" in recipe_data["run"]:
308+
model_type = recipe_data["run"]["model_type"]
309+
310+
if model_type == "hf":
311+
HfRecipeSchema(**recipe_data)
312+
elif model_type == "neuron-hf":
313+
NeuronHfRecipeSchema(**recipe_data)
314+
elif "nova" in model_type and "evaluation" in recipe_data:
315+
NovaEvaluationRecipeSchema(**recipe_data)
316+
elif "nova" in model_type:
317+
NovaRecipeSchema(**recipe_data)
318+
else:
319+
raise Exception("Unsupported model_type {model_type}. Make sure the recipe exists in src/hyperpod_cli/sagemaker_hyperpod_recipes/recipes_collection/recipes")
320+
return True
321+
else:
322+
# there are 3 yaml without model_type
323+
try:
324+
# recipes/training/llama/megatron_llama3_1_8b_nemo.yaml
325+
HfRecipeSchema(**recipe_data)
326+
return True
327+
except Exception as e:
328+
pass
329+
330+
try:
331+
# recipes/fine-tuning/nova/nova_premier_r5_cpu_distill.yaml
332+
# recipes/fine-tuning/nova/nova_pro_r5_cpu_distill.yaml
333+
NovaRecipeSchema(**recipe_data)
334+
return True
335+
except Exception as e:
336+
pass
337+
338+
logger.error("Cannot validate recipe with existing templates. Make sure you are using correct recipe file in src/hyperpod_cli/sagemaker_hyperpod_recipes/recipes_collection/recipes.")
339+
return False
340+
except yaml.YAMLError as e:
341+
logger.error(f"Invalid YAML in recipe file: {e}")
342+
return False
343+
except Exception as e:
344+
logger.error(f"Error validating recipe: {e}")
345+
return False
304346

305347
def is_dict_str_list_str(data: dict) -> bool:
306348
"""
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from hyperpod_cli.validators.recipe_models.hf.model import HfRecipeSchema
2+
from hyperpod_cli.validators.recipe_models.neuron_hf.model import NeuronHfRecipeSchema
3+
from hyperpod_cli.validators.recipe_models.nova.model import NovaRecipeSchema
4+
from hyperpod_cli.validators.recipe_models.nova_evaluation.model import NovaEvaluationRecipeSchema

src/hyperpod_cli/validators/recipe_models/hf/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)