feat(components): migrate function_based resolve_num_microbatches to rlhf_preprocessor component

Googler · Google Cloud Pipeline Components maintainers · commit ee28c72893a0 · 2024-04-25T16:33:32.000-07:00
PiperOrigin-RevId: 628226399
diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py
@@ -17,4 +17,4 @@
 DO NOT EDIT - This file is generated, manual changes will be overridden.
 """
 
-IMAGE_TAG = '20240423_1336'
+IMAGE_TAG = '20240425_1027_RC00'
diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reinforcement_learning_graph.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reinforcement_learning_graph.py
@@ -62,6 +62,7 @@ def pipeline(
     location: str = _placeholders.LOCATION_PLACEHOLDER,
     tensorboard_resource_id: str = '',
     encryption_spec_key_name: str = '',
+    num_microbatches: int = 0,
 ) -> PipelineOutput:
   # fmt: off
   """Trains a reward model.
@@ -122,9 +123,6 @@ def pipeline(
       .set_display_name('Import Prompt Dataset')
       .set_caching_options(False)
   )
-  num_microbatches = function_based.resolve_num_microbatches(
-      large_model_reference=policy_model_reference,
-  ).set_display_name('Resolve Number of Microbatches')
   rl_model = (
       reinforcer.reinforcer(
           project=project,
@@ -150,7 +148,7 @@ def pipeline(
           kl_coeff=kl_coeff,
           lora_dim=lora_dim,
           reward_lora_dim=reward_lora_dim,
-          num_microbatches=num_microbatches.output,
+          num_microbatches=num_microbatches,
           encryption_spec_key_name=encryption_spec_key_name,
           tensorboard_resource_id=tensorboard_resource_id,
       )
diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py
@@ -57,6 +57,7 @@ def pipeline(
     location: str = _placeholders.LOCATION_PLACEHOLDER,
     tensorboard_resource_id: str = '',
     encryption_spec_key_name: str = '',
+    num_microbatches: int = 0,
 ) -> PipelineOutput:
   # fmt: off
   """Trains a reward model.
@@ -82,6 +83,7 @@ def pipeline(
     location: Location used to run non-tuning components, i.e. components that do not require accelerators. If not specified the location used to run the pipeline will be used.
     tensorboard_resource_id: Optional tensorboard resource id in format `projects/{project_number}/locations/{location}/tensorboards/{tensorboard_id}`. If provided, tensorboard metrics will be uploaded to this location.
     encryption_spec_key_name: Customer-managed encryption key. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. Note that this is not supported for TPU at the moment.
+    num_microbatches: The number of microbatches to break the total batch size into during training.
 
   Returns:
     reward_model_adapter_path: Path to the output LoRA adapter.
@@ -140,9 +142,6 @@ def pipeline(
       .set_caching_options(False)
   )
 
-  num_microbatches = function_based.resolve_num_microbatches(
-      large_model_reference=reward_model_reference,
-  ).set_display_name('Resolve Number of Microbatches')
   reward_model = (
       reward_model_trainer.reward_model_trainer(
           project=project,
@@ -165,7 +164,7 @@ def pipeline(
           batch_size=batch_size,
           learning_rate_multiplier=reward_model_learning_rate_multiplier,
           lora_dim=lora_dim,
-          num_microbatches=num_microbatches.output,
+          num_microbatches=num_microbatches,
           encryption_spec_key_name=encryption_spec_key_name,
           tensorboard_resource_id=tensorboard_resource_id,
       )
diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py
@@ -42,6 +42,7 @@ def rlhf_preprocessor(
     metadata_accelerator_type: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
     metadata_accelerator_count: dsl.OutputPath(int),  # pytype: disable=invalid-annotation
     metadata_refined_image_uri: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
+    metadata_num_microbatches: dsl.OutputPath(int),  # pytype: disable=invalid-annotation
     use_experimental_image: bool = False,
     evaluation_dataset: str = '',
     tensorboard_resource_id: str = '',
@@ -77,6 +78,8 @@ def rlhf_preprocessor(
     metadata_accelerator_type: Specific accelerator type for the custom job.
     metadata_accelerator_count: The number of accelerator.
     metadata_refined_image_uri: Docker image URI to use for the custom job.
+    metadata_num_microbatches: Number of microbatches to break the total batch
+      size into during training.
   """
   # fmt: on
   return gcpc_utils.build_serverless_customjob_container_spec(
@@ -110,6 +113,7 @@ def rlhf_preprocessor(
               f'--metadata_accelerator_type_path={metadata_accelerator_type}',
               f'--metadata_accelerator_count_path={metadata_accelerator_count}',
               f'--metadata_refined_image_uri_path={metadata_refined_image_uri}',
+              f'--metadata_num_microbatches_path={metadata_num_microbatches}',
           ],
       ),
       gcp_resources=gcp_resources,
diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py
@@ -107,6 +107,7 @@ def rlhf_pipeline(
       evaluation_dataset=eval_dataset,
       tensorboard_resource_id=tensorboard_resource_id,
   ).set_display_name('Preprocess Inputs')
+  num_microbatches = preprocess_metadata.outputs['metadata_num_microbatches']
 
   reward_model_pipeline = (
       (
@@ -145,6 +146,7 @@ def rlhf_pipeline(
               location=location,
               tensorboard_resource_id=tensorboard_resource_id,
               encryption_spec_key_name=encryption_spec_key_name,
+              num_microbatches=num_microbatches,
           )
       )
       .set_display_name('Train Reward Model')
@@ -189,6 +191,7 @@ def rlhf_pipeline(
       location=location,
       tensorboard_resource_id=tensorboard_resource_id,
       encryption_spec_key_name=encryption_spec_key_name,
+      num_microbatches=num_microbatches,
   ).set_display_name('Reinforcement Learning')
 
   has_inference_dataset = preprocess_metadata.outputs['has_inference_dataset']