feat(components): migrate function_based convert_to_delimited_string to rlhf_preprocessor component

Googler · Google Cloud Pipeline Components maintainers · commit efefe346f0a9 · 2024-04-25T21:10:44.000-07:00
PiperOrigin-RevId: 628282787
diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/generated/refined_image_versions.py
@@ -17,4 +17,4 @@
 DO NOT EDIT - This file is generated, manual changes will be overridden.
 """
 
-IMAGE_TAG = '20240425_1027_RC00'
+IMAGE_TAG = '20240425_1734_RC00'
diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/reward_model_graph.py
@@ -21,6 +21,7 @@
 from google_cloud_pipeline_components._implementation.llm import preprocess_chat_dataset
 from google_cloud_pipeline_components._implementation.llm import private_text_comparison_importer
 from google_cloud_pipeline_components._implementation.llm import reward_model_trainer
+from google_cloud_pipeline_components._implementation.llm import rlhf_preprocessor
 from google_cloud_pipeline_components._implementation.llm import upload_tensorboard_metrics
 import kfp
 
@@ -45,6 +46,7 @@ def pipeline(
     accelerator_type: str,
     accelerator_count: int,
     reward_model_image_uri: str,
+    comma_separated_candidates_field_names: str,
     prompt_sequence_length: int = 512,
     target_sequence_length: int = 64,
     batch_size: int = 64,
@@ -72,6 +74,7 @@ def pipeline(
     accelerator_type: Specific accelerator type for the custom job.
     accelerator_count: The number of accelerator.
     reward_model_image_uri: Docker image URI to use for the reward model training job.
+    comma_separated_candidates_field_names: Comma separated list of fields that contain candidate text, e.g. ``'field_1,field_2,field_3'``.
     prompt_sequence_length: Maximum tokenized sequence length for input text. Higher values increase memory overhead. This value should be at most 8192. Default value is 512.
     target_sequence_length:  Maximum tokenized sequence length for target text. Higher values increase memory overhead. This value should be at most 1024. Default value is 64.
     batch_size: Number of examples in each finetuning step. Default is 64.
@@ -91,7 +94,6 @@ def pipeline(
   """
   # fmt: on
   prompt_column = 'input_text'
-  candidate_columns = ['candidate_0', 'candidate_1']
   choice_column = 'choice'
 
   processed_preference_dataset = (
@@ -103,9 +105,6 @@ def pipeline(
       ).set_display_name('Preprocess Prompt Dataset')
   )
 
-  comma_separated_candidates_field_names = (
-      function_based.convert_to_delimited_string(items=candidate_columns)
-  )
   preference_dataset_importer = (
       private_text_comparison_importer.private_text_comparison_importer(
           project=project,
@@ -114,7 +113,7 @@ def pipeline(
               'processed_dataset_uri'
           ],
           inputs_field_name=prompt_column,
-          comma_separated_candidates_field_names=comma_separated_candidates_field_names.output,
+          comma_separated_candidates_field_names=comma_separated_candidates_field_names,
           choice_field_name=choice_column,
           split=env.TRAIN_SPLIT,
           large_model_reference=reward_model_reference,
@@ -131,7 +130,7 @@ def pipeline(
           location=location,
           input_text=eval_dataset,
           inputs_field_name=prompt_column,
-          comma_separated_candidates_field_names=comma_separated_candidates_field_names.output,
+          comma_separated_candidates_field_names=comma_separated_candidates_field_names,
           choice_field_name=choice_column,
           split=env.TRAIN_SPLIT,
           large_model_reference=reward_model_reference,
diff --git a/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py b/components/google-cloud/google_cloud_pipeline_components/_implementation/llm/rlhf_preprocessor.py
@@ -14,6 +14,7 @@
 """Component that preprocesses inputs for Reinforcement Learning from Human Feedback (RLHF)."""
 
 import os
+from typing import List
 
 from google_cloud_pipeline_components import _placeholders
 from google_cloud_pipeline_components import utils as gcpc_utils
@@ -33,6 +34,7 @@ def rlhf_preprocessor(
     gcp_resources: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
     has_tensorboard_id: dsl.OutputPath(bool),  # pytype: disable=invalid-annotation
     has_inference_dataset: dsl.OutputPath(bool),  # pytype: disable=invalid-annotation
+    metadata_candidate_columns_string: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
     metadata_large_model_reference: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
     metadata_reference_model_path: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
     metadata_reward_model_reference: dsl.OutputPath(str),  # pytype: disable=invalid-annotation
@@ -104,6 +106,7 @@ def rlhf_preprocessor(
               f'--use_experimental_image={use_experimental_image}',
               f'--has_tensorboard_id_path={has_tensorboard_id}',
               f'--has_inference_dataset_path={has_inference_dataset}',
+              f'--metadata_candidate_columns_string_path={metadata_candidate_columns_string}',
               f'--metadata_large_model_reference_path={metadata_large_model_reference}',
               f'--metadata_reference_model_path_path={metadata_reference_model_path}',
               f'--metadata_reward_model_reference_path={metadata_reward_model_reference}',
diff --git a/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py b/components/google-cloud/google_cloud_pipeline_components/preview/llm/rlhf/component.py
@@ -133,6 +133,9 @@ def rlhf_pipeline(
               reward_model_image_uri=preprocess_metadata.outputs[
                   'metadata_refined_image_uri'
               ],
+              comma_separated_candidates_field_names=preprocess_metadata.outputs[
+                  'metadata_candidate_columns_string'
+              ],
               prompt_sequence_length=prompt_sequence_length,
               target_sequence_length=target_sequence_length,
               eval_dataset=validate_pipeline_task.outputs[