Merge pull request #2722 from AI-Hypercomputer:pydantic_fix

Google-ML-Automation · Google-ML-Automation · commit acd9e078d24b · 2025-11-20T14:31:20.000-08:00
PiperOrigin-RevId: 834913985
diff --git a/dependencies/dockerfiles/maxtext_post_training_dependencies.Dockerfile b/dependencies/dockerfiles/maxtext_post_training_dependencies.Dockerfile
@@ -30,6 +30,8 @@ RUN pip install numba==0.61.2
 # Install vLLM for Jax and TPUs
 RUN pip install vllm-tpu
 
+RUN pip install --no-deps qwix==0.1.4
+
 RUN if [ "$MODE" = "post-training-experimental" ]; then \
     pip uninstall -y jax jaxlib libtpu && \
     pip install --pre -U jax jaxlib -i https://us-python.pkg.dev/ml-oss-artifacts-published/jax/simple/ && \
diff --git a/dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile b/dependencies/dockerfiles/maxtext_post_training_local_dependencies.Dockerfile
@@ -28,6 +28,7 @@ RUN pip install keyring keyrings.google-artifactregistry-auth
 RUN pip install numba==0.61.2
 
 COPY tunix /tunix
+RUN pip uninstall -y google-tunix
 RUN pip install -e /tunix --no-cache-dir
 
 
@@ -49,6 +50,7 @@ RUN pip install -e /tpu-inference --no-cache-dir --pre \
     --extra-index-url https://us-python.pkg.dev/ml-oss-artifacts-published/jax/simple/ \
     --find-links https://storage.googleapis.com/jax-releases/libtpu_releases.html
 
+RUN pip install --no-deps qwix==0.1.4
 
 RUN if [ "$MODE" = "post-training-experimental" ]; then \
     echo "MODE=post-training-experimental: Re-installing JAX/libtpu"; \
diff --git a/src/MaxText/configs/rl.yml b/src/MaxText/configs/rl.yml
@@ -97,7 +97,7 @@ train_fraction: 1.0
 
 eval_interval: 10  # this doesn't matter if `TRAIN_FRACTION = 1.0`.
 
-num_epochs: 1  # can potentially train for more epochs
+num_epoch: 1  # can potentially train for more epochs
 
 learning_rate: 3e-6
 adam_b1: 0.9 # Exponential decay rate to track the first moment of past gradients.
diff --git a/src/MaxText/configs/types.py b/src/MaxText/configs/types.py
@@ -19,7 +19,7 @@
 from enum import Enum
 from math import prod
 from tempfile import gettempdir
-from typing import Any, NewType, Literal
+from typing import Any, NewType, Literal, Optional
 import datetime
 import logging
 import math
@@ -837,9 +837,9 @@ class HfDataset(BaseModel):
 
   hf_path: str = Field("", description="Path or name of the Hugging Face dataset.")
   hf_data_dir: PathStr = Field("", description="Data directory for the HF dataset.")
-  hf_train_files: str = Field("", description="Files for the HF training split.")
+  hf_train_files: Optional[str] = Field(None, description="Files for the HF training split.")
   hf_eval_split: str = Field("", description="Name of the HF evaluation split.")
-  hf_eval_files: str = Field("", description="Files for the HF evaluation split.")
+  hf_eval_files: Optional[str] = Field(None, description="Files for the HF evaluation split.")
   hf_access_token: None | str = Field(None, description="Hugging Face API access token.")
 
 
diff --git a/src/MaxText/examples/install_tunix_vllm_requirement.sh b/src/MaxText/examples/install_tunix_vllm_requirement.sh
@@ -19,7 +19,7 @@
 set -e
 set -x
 
-uv pip uninstall -y jax jaxlib libtpu
+uv pip uninstall jax jaxlib libtpu
 
 uv pip install aiohttp==3.12.15
 
@@ -28,6 +28,4 @@ uv pip install vllm-tpu
 
 uv pip install numba==0.61.2
 
-uv pip install qwix==0.1.1
-
-uv pip install flax==0.11.1
+uv pip install --no-deps qwix==0.1.4
diff --git a/src/MaxText/model_creation_utils.py b/src/MaxText/model_creation_utils.py
@@ -117,7 +117,7 @@ def create_model(config, mesh, model_mode: str = MODEL_MODE_TRAIN, rngs: nnx.Rng
   return model
 
 
-def create_nnx_model(config, mesh=None, devices=None, model_mode=None, rng_key=None):
+def create_nnx_model(config, mesh=None, devices=None, model_mode=MODEL_MODE_TRAIN, rng_key=None):
   """Creates a NNX model with sharded parameters, possibly loading from a checkpoint."""
 
   def _create_model(mesh: Mesh | None = None, model_mode: str = MODEL_MODE_TRAIN, rng_key: jax.Array | None = None):
diff --git a/src/MaxText/pyconfig.py b/src/MaxText/pyconfig.py
@@ -97,11 +97,6 @@ def _prepare_for_pydantic(raw_keys: dict[str, Any]) -> dict[str, Any]:
   pydantic_kwargs = {}
   valid_fields = types.MaxTextConfig.model_fields.keys()
 
-  # This is a workaround for tests that use `dataset_type='hf'` but do not
-  # specify `tokenizer_type='huggingface'`, which they should.
-  if raw_keys.get("dataset_type") == "hf" and "tokenizer_type" not in raw_keys:
-    raw_keys["tokenizer_type"] = "huggingface"
-
   for key, value in raw_keys.items():
     if key not in valid_fields:
       logger.warning("Ignoring invalid/unsupported field from YAML/CLI: %s", repr(key))
@@ -119,7 +114,11 @@ def _prepare_for_pydantic(raw_keys: dict[str, Any]) -> dict[str, Any]:
       if key == "data_sharding" and isinstance(new_value, list) and new_value and isinstance(new_value[0], str):
         new_value = [new_value]
 
-    if key in ("run_name", "hf_train_files", "hf_eval_files") and new_value is None:
+    # An empty value provided in the configuration is treated as None
+    if key in ("hf_train_files", "hf_eval_files") and new_value == "":
+      new_value = None
+
+    if key == "run_name" and new_value is None:
       new_value = ""
 
     pydantic_kwargs[key] = new_value
diff --git a/src/MaxText/rl/train_rl.py b/src/MaxText/rl/train_rl.py
@@ -100,7 +100,7 @@ def get_maxtext_model(config, devices=None):
   # Please ensure that you pass the full path ending in `/0/items` for load_parameters_path to train_rl.py i.e.,
   # load_parameters_path=/path/to/your/output/directory/0/items
   """
-  model, mesh = model_creation_utils.create_nnx_model(config, devices)
+  model, mesh = model_creation_utils.create_nnx_model(config, devices=devices)
   with mesh:
     tunix_model = TunixMaxTextAdapter(base_model=model)
     tunix_model.config = None
@@ -238,7 +238,7 @@ def rl_train(trainer_config, sampler_config, trainer_devices, sampler_devices):
       trainer_config.num_batches
       * trainer_config.num_iterations
       * trainer_config.train_fraction
-      * trainer_config.num_epochs
+      * trainer_config.num_epoch
   )
 
   # ====== Data ======
@@ -260,10 +260,10 @@ def rl_train(trainer_config, sampler_config, trainer_devices, sampler_devices):
   )[: trainer_config.num_batches]
 
   if trainer_config.train_fraction == 1.0:
-    train_dataset = dataset.repeat(trainer_config.num_epochs)
+    train_dataset = dataset.repeat(trainer_config.num_epoch)
   else:
     train_dataset = dataset[: int(len(dataset) * trainer_config.train_fraction)]
-    train_dataset = train_dataset.repeat(trainer_config.num_epochs)
+    train_dataset = train_dataset.repeat(trainer_config.num_epoch)
 
   test_dataset = get_dataset(model_tokenizer, trainer_config, test_data_dir, trainer_config.eval_split).batch(
       trainer_config.batch_size
@@ -416,7 +416,7 @@ def rl_train(trainer_config, sampler_config, trainer_devices, sampler_devices):
           lambda **kwargs: utils_rl.check_answer(tmvp_config=trainer_config, **kwargs),
           lambda **kwargs: utils_rl.check_numbers(tmvp_config=trainer_config, **kwargs),
       ],
-      grpo_config=grpo_config,
+      algo_config=grpo_config,
   )
 
   # Before we train the model, let's evaluate the model on the test set so we can