|
14 | 14 | import os |
15 | 15 | import yaml |
16 | 16 | from yaml.loader import SafeLoader |
17 | | -from typing import Optional, List |
18 | | - |
| 17 | +from typing import Optional |
19 | 18 | from hyperpod_cli.clients.kubernetes_client import KubernetesClient |
20 | 19 | from hyperpod_cli.constants.command_constants import ( |
21 | 20 | KUEUE_WORKLOAD_PRIORITY_CLASS_LABEL_KEY, |
|
34 | 33 | from hyperpod_cli.validators.validator import ( |
35 | 34 | Validator, |
36 | 35 | ) |
| 36 | +from hyperpod_cli.validators.recipe_models import HfRecipeSchema, NovaRecipeSchema, NeuronHfRecipeSchema, NovaEvaluationRecipeSchema |
37 | 37 |
|
38 | 38 | logger = setup_logger(__name__) |
39 | 39 |
|
@@ -293,14 +293,56 @@ def validate_scheduler_related_fields( |
293 | 293 | return True |
294 | 294 |
|
295 | 295 | def validate_recipe_file(recipe: str): |
296 | | - full_recipe_path = os.path.join(RECIPES_DIR, f"{recipe}.yaml") |
297 | | - |
298 | | - if os.path.exists(full_recipe_path) and os.path.isfile(full_recipe_path): |
299 | | - logger.info(f"Recipe file found: {full_recipe_path}") |
300 | | - return True |
301 | | - |
302 | | - logger.error(f"Recipe file '{recipe}.yaml' not found in {RECIPES_DIR}") |
303 | | - return False |
| 296 | + recipe_path = os.path.join(RECIPES_DIR, f"{recipe}.yaml") |
| 297 | + |
| 298 | + if not os.path.exists(recipe_path): |
| 299 | + logger.error(f"Recipe file not found: {recipe_path}") |
| 300 | + return False |
| 301 | + |
| 302 | + # validate yaml field names |
| 303 | + try: |
| 304 | + with open(recipe_path, "r") as f: |
| 305 | + recipe_data = yaml.safe_load(f) |
| 306 | + |
| 307 | + if "run" in recipe_data and "model_type" in recipe_data["run"]: |
| 308 | + model_type = recipe_data["run"]["model_type"] |
| 309 | + |
| 310 | + if model_type == "hf": |
| 311 | + HfRecipeSchema(**recipe_data) |
| 312 | + elif model_type == "neuron-hf": |
| 313 | + NeuronHfRecipeSchema(**recipe_data) |
| 314 | + elif "nova" in model_type and "evaluation" in recipe_data: |
| 315 | + NovaEvaluationRecipeSchema(**recipe_data) |
| 316 | + elif "nova" in model_type: |
| 317 | + NovaRecipeSchema(**recipe_data) |
| 318 | + else: |
| 319 | + raise Exception("Unsupported model_type {model_type}. Make sure the recipe exists in src/hyperpod_cli/sagemaker_hyperpod_recipes/recipes_collection/recipes") |
| 320 | + return True |
| 321 | + else: |
| 322 | + # there are 3 yaml without model_type |
| 323 | + try: |
| 324 | + # recipes/training/llama/megatron_llama3_1_8b_nemo.yaml |
| 325 | + HfRecipeSchema(**recipe_data) |
| 326 | + return True |
| 327 | + except Exception as e: |
| 328 | + pass |
| 329 | + |
| 330 | + try: |
| 331 | + # recipes/fine-tuning/nova/nova_premier_r5_cpu_distill.yaml |
| 332 | + # recipes/fine-tuning/nova/nova_pro_r5_cpu_distill.yaml |
| 333 | + NovaRecipeSchema(**recipe_data) |
| 334 | + return True |
| 335 | + except Exception as e: |
| 336 | + pass |
| 337 | + |
| 338 | + logger.error("Cannot validate recipe with existing templates. Make sure you are using correct recipe file in src/hyperpod_cli/sagemaker_hyperpod_recipes/recipes_collection/recipes.") |
| 339 | + return False |
| 340 | + except yaml.YAMLError as e: |
| 341 | + logger.error(f"Invalid YAML in recipe file: {e}") |
| 342 | + return False |
| 343 | + except Exception as e: |
| 344 | + logger.error(f"Error validating recipe: {e}") |
| 345 | + return False |
304 | 346 |
|
305 | 347 | def is_dict_str_list_str(data: dict) -> bool: |
306 | 348 | """ |
|
0 commit comments