From 794a16debab530bd71e809944c8fb365a42064b6 Mon Sep 17 00:00:00 2001 From: Thoughtseize1 Date: Wed, 18 Sep 2024 16:33:36 +0200 Subject: [PATCH] New Samples for Model Tuning folder --- .../create_evaluation_task_example.py | 92 +++++++++++++++ .../create_evaluation_task_example_test.py | 20 ++++ .../model_tuning/distillation_example.py | 70 ++++++++++++ .../model_tuning/distillation_example_test.py | 107 ++++++++++++++++++ .../model_tuning/evaluate_model_example.py | 65 +++++++++++ .../evaluate_model_example_test.py | 32 ++++++ .../pretrained_codegen_example.py | 50 ++++++++ .../model_tuning/pretrained_examples_test.py | 74 ++++++++++++ .../model_tuning/pretrained_list_example.py | 44 +++++++ .../pretrained_textgen_example.py | 54 +++++++++ .../supervised_advanced_example.py | 63 +++++++++++ .../model_tuning/supervised_cancel_example.py | 40 +++++++ .../model_tuning/supervised_example.py | 57 ++++++++++ .../model_tuning/supervised_get_example.py | 48 ++++++++ .../model_tuning/supervised_list_example.py | 46 ++++++++ .../supervised_tuning_examples_test.py | 45 ++++++++ .../tune_code_generation_model.py | 53 +++++++++ generative_ai/model_tuning/tuning.py | 56 +++++++++ 18 files changed, 1016 insertions(+) create mode 100644 generative_ai/model_tuning/create_evaluation_task_example.py create mode 100644 generative_ai/model_tuning/create_evaluation_task_example_test.py create mode 100644 generative_ai/model_tuning/distillation_example.py create mode 100644 generative_ai/model_tuning/distillation_example_test.py create mode 100644 generative_ai/model_tuning/evaluate_model_example.py create mode 100644 generative_ai/model_tuning/evaluate_model_example_test.py create mode 100644 generative_ai/model_tuning/pretrained_codegen_example.py create mode 100644 generative_ai/model_tuning/pretrained_examples_test.py create mode 100644 generative_ai/model_tuning/pretrained_list_example.py create mode 100644 generative_ai/model_tuning/pretrained_textgen_example.py create mode 100644 generative_ai/model_tuning/supervised_advanced_example.py create mode 100644 generative_ai/model_tuning/supervised_cancel_example.py create mode 100644 generative_ai/model_tuning/supervised_example.py create mode 100644 generative_ai/model_tuning/supervised_get_example.py create mode 100644 generative_ai/model_tuning/supervised_list_example.py create mode 100644 generative_ai/model_tuning/supervised_tuning_examples_test.py create mode 100644 generative_ai/model_tuning/tune_code_generation_model.py create mode 100644 generative_ai/model_tuning/tuning.py diff --git a/generative_ai/model_tuning/create_evaluation_task_example.py b/generative_ai/model_tuning/create_evaluation_task_example.py new file mode 100644 index 00000000000..2eca9d05067 --- /dev/null +++ b/generative_ai/model_tuning/create_evaluation_task_example.py @@ -0,0 +1,92 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +from vertexai.preview.evaluation import EvalResult + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +def create_evaluation_task() -> EvalResult: + # [START generativeaionvertexai_create_evaluation_task] + import pandas as pd + + import vertexai + from vertexai.preview.evaluation import EvalTask, MetricPromptTemplateExamples + + # TODO(developer): Update and un-comment below line + # PROJECT_ID = "your-project-id" + vertexai.init(project=PROJECT_ID, location="us-central1") + + eval_dataset = pd.DataFrame( + { + "instruction": [ + "Summarize the text in one sentence.", + "Summarize the text such that a five-year-old can understand.", + ], + "context": [ + """As part of a comprehensive initiative to tackle urban congestion and foster + sustainable urban living, a major city has revealed ambitious plans for an + extensive overhaul of its public transportation system. The project aims not + only to improve the efficiency and reliability of public transit but also to + reduce the city\'s carbon footprint and promote eco-friendly commuting options. + City officials anticipate that this strategic investment will enhance + accessibility for residents and visitors alike, ushering in a new era of + efficient, environmentally conscious urban transportation.""", + """A team of archaeologists has unearthed ancient artifacts shedding light on a + previously unknown civilization. The findings challenge existing historical + narratives and provide valuable insights into human history.""", + ], + "response": [ + "A major city is revamping its public transportation system to fight congestion, reduce emissions, and make getting around greener and easier.", + "Some people who dig for old things found some very special tools and objects that tell us about people who lived a long, long time ago! What they found is like a new puzzle piece that helps us understand how people used to live.", + ], + } + ) + + eval_task = EvalTask( + dataset=eval_dataset, + metrics=[ + MetricPromptTemplateExamples.Pointwise.SUMMARIZATION_QUALITY, + MetricPromptTemplateExamples.Pointwise.GROUNDEDNESS, + MetricPromptTemplateExamples.Pointwise.VERBOSITY, + MetricPromptTemplateExamples.Pointwise.INSTRUCTION_FOLLOWING, + ], + ) + + prompt_template = ( + "Instruction: {instruction}. Article: {context}. Summary: {response}" + ) + result = eval_task.evaluate(prompt_template=prompt_template) + + print("Summary Metrics:\n") + + for key, value in result.summary_metrics.items(): + print(f"{key}: \t{value}") + + print("\n\nMetrics Table:\n") + print(result.metrics_table) + # Example response: + # Summary Metrics: + # row_count: 2 + # summarization_quality/mean: 3.5 + # summarization_quality/std: 2.1213203435596424 + # ... + + # [END generativeaionvertexai_create_evaluation_task] + return result + + +if __name__ == "__main__": + create_evaluation_task() diff --git a/generative_ai/model_tuning/create_evaluation_task_example_test.py b/generative_ai/model_tuning/create_evaluation_task_example_test.py new file mode 100644 index 00000000000..d4aac1a7535 --- /dev/null +++ b/generative_ai/model_tuning/create_evaluation_task_example_test.py @@ -0,0 +1,20 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import create_evaluation_task_example + + +def test_create_evaluation_task() -> None: + response = create_evaluation_task_example.create_evaluation_task() + assert response diff --git a/generative_ai/model_tuning/distillation_example.py b/generative_ai/model_tuning/distillation_example.py new file mode 100644 index 00000000000..43ff6374c47 --- /dev/null +++ b/generative_ai/model_tuning/distillation_example.py @@ -0,0 +1,70 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START generativeaionvertexai_sdk_distillation] +from __future__ import annotations + +import os + +from typing import Optional + +import vertexai +from vertexai.preview.language_models import TextGenerationModel, TuningEvaluationSpec + + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +def distill_model( + dataset: str, + source_model: str, + evaluation_dataset: Optional[str] = None, +) -> None: + """Distill a new model using a teacher model and a dataset. + Args: + dataset (str): GCS URI of the JSONL file containing the training data. + E.g., "gs://[BUCKET]/[FILENAME].jsonl". + source_model (str): Name of the teacher model to distill from. + E.g., "text-unicorn@001". + evaluation_dataset (Optional[str]): GCS URI of the JSONL file containing the evaluation data. + """ + # TODO developer - override these parameters as needed: + vertexai.init(project=PROJECT_ID, location="us-central1") + + # Create a tuning evaluation specification with the evaluation dataset + eval_spec = TuningEvaluationSpec(evaluation_data=evaluation_dataset) + + # Load the student model from a pre-trained model + student_model = TextGenerationModel.from_pretrained("text-bison@002") + + # Start the distillation job using the teacher model and dataset + distillation_job = student_model.distill_from( + teacher_model=source_model, + dataset=dataset, + # Optional: + train_steps=300, # Number of training steps to use when tuning the model. + evaluation_spec=eval_spec, + ) + + return distillation_job + + +# [END generativeaionvertexai_sdk_distillation] + +if __name__ == "__main__": + distill_model( + dataset="your-dataset-uri", + source_model="your-source-model", + evaluation_dataset="your-evaluation-dataset-uri", + ) diff --git a/generative_ai/model_tuning/distillation_example_test.py b/generative_ai/model_tuning/distillation_example_test.py new file mode 100644 index 00000000000..7fbbe10bd01 --- /dev/null +++ b/generative_ai/model_tuning/distillation_example_test.py @@ -0,0 +1,107 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import uuid + +import distillation_example + +from google.cloud import aiplatform +from google.cloud import storage + +from google.cloud.aiplatform.compat.types import pipeline_state + +import pytest + +from vertexai.preview.language_models import TextGenerationModel + +_BUCKET = os.environ["CLOUD_STORAGE_BUCKET"] + + +def get_model_display_name(tuned_model: TextGenerationModel) -> str: + language_model_tuning_job = tuned_model._job + pipeline_job = language_model_tuning_job._job + return dict(pipeline_job._gca_resource.runtime_config.parameter_values)[ + "model_display_name" + ] + + +def upload_to_gcs(bucket: str, name: str, data: str) -> None: + client = storage.Client() + bucket = client.get_bucket(bucket) + blob = bucket.blob(name) + blob.upload_from_string(data) + + +def download_from_gcs(bucket: str, name: str) -> str: + client = storage.Client() + bucket = client.get_bucket(bucket) + blob = bucket.blob(name) + data = blob.download_as_bytes() + return "\n".join(data.decode().splitlines()[:10]) + + +def delete_from_gcs(bucket: str, name: str) -> None: + client = storage.Client() + bucket = client.get_bucket(bucket) + blob = bucket.blob(name) + blob.delete() + + +@pytest.fixture(scope="function") +def training_data_filename() -> str: + temp_filename = f"{uuid.uuid4()}.jsonl" + data = download_from_gcs( + "cloud-samples-data", "ai-platform/generative_ai/headline_classification.jsonl" + ) + upload_to_gcs(_BUCKET, temp_filename, data) + try: + yield f"gs://{_BUCKET}/{temp_filename}" + finally: + delete_from_gcs(_BUCKET, temp_filename) + + +def teardown_model( + tuned_model: TextGenerationModel, training_data_filename: str +) -> None: + for tuned_model_name in tuned_model.list_tuned_model_names(): + model_registry = aiplatform.models.ModelRegistry(model=tuned_model_name) + if ( + training_data_filename + in model_registry.get_version_info("1").model_display_name + ): + display_name = model_registry.get_version_info("1").model_display_name + for endpoint in aiplatform.Endpoint.list(): + for _ in endpoint.list_models(): + if endpoint.display_name == display_name: + endpoint.undeploy_all() + endpoint.delete() + aiplatform.Model(model_registry.model_resource_name).delete() + + +@pytest.mark.skip("Blocked on b/277959219") +def test_distill_model(training_data_filename: str) -> None: + """Takes approx. 60 minutes.""" + student_model = distillation_example.distill_model( + dataset=training_data_filename, + teacher_model="text-unicorn@001", + evaluation_dataset=training_data_filename, + ) + try: + assert ( + student_model._job.state + == pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED + ) + finally: + teardown_model(student_model, training_data_filename) diff --git a/generative_ai/model_tuning/evaluate_model_example.py b/generative_ai/model_tuning/evaluate_model_example.py new file mode 100644 index 00000000000..3ccc352742f --- /dev/null +++ b/generative_ai/model_tuning/evaluate_model_example.py @@ -0,0 +1,65 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START generativeaionvertexai_evaluate_model] +import os + +from google.auth import default + +import vertexai +from vertexai.preview.language_models import ( + EvaluationTextClassificationSpec, + TextGenerationModel, +) + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +def evaluate_model() -> object: + """Evaluate the performance of a generative AI model.""" + + # Set credentials for the pipeline components used in the evaluation task + credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) + + vertexai.init(project=PROJECT_ID, location="us-central1", credentials=credentials) + + # Create a reference to a generative AI model + model = TextGenerationModel.from_pretrained("text-bison@002") + + # Define the evaluation specification for a text classification task + task_spec = EvaluationTextClassificationSpec( + ground_truth_data=[ + "gs://cloud-samples-data/ai-platform/generative_ai/llm_classification_bp_input_prompts_with_ground_truth.jsonl" + ], + class_names=["nature", "news", "sports", "health", "startups"], + target_column_name="ground_truth", + ) + + # Evaluate the model + eval_metrics = model.evaluate(task_spec=task_spec) + print(eval_metrics) + # Example response: + # ... + # PipelineJob run completed. + # Resource name: projects/123456789/locations/us-central1/pipelineJobs/evaluation-llm-classification-... + # EvaluationClassificationMetric(label_name=None, auPrc=0.53833705, auRoc=0.8... + + return eval_metrics + + +# [END generativeaionvertexai_evaluate_model] + + +if __name__ == "__main__": + evaluate_model() diff --git a/generative_ai/model_tuning/evaluate_model_example_test.py b/generative_ai/model_tuning/evaluate_model_example_test.py new file mode 100644 index 00000000000..45756147de0 --- /dev/null +++ b/generative_ai/model_tuning/evaluate_model_example_test.py @@ -0,0 +1,32 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import backoff + +import evaluate_model_example + +from google.api_core.exceptions import ResourceExhausted + +import pytest + + +@pytest.mark.skip( + reason="Model is giving 404 Not found error." + "Need to investigate. Created an issue tracker is at " + "python-docs-samples/issues/11264" +) +@backoff.on_exception(backoff.expo, ResourceExhausted, max_time=10) +def test_evaluate_model() -> None: + eval_metrics = evaluate_model_example.evaluate_model() + assert hasattr(eval_metrics, "auRoc") diff --git a/generative_ai/model_tuning/pretrained_codegen_example.py b/generative_ai/model_tuning/pretrained_codegen_example.py new file mode 100644 index 00000000000..def942422d1 --- /dev/null +++ b/generative_ai/model_tuning/pretrained_codegen_example.py @@ -0,0 +1,50 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START generativeaionvertexai_sdk_tune_code_generation_model] +from __future__ import annotations + +import os + +import vertexai +from vertexai.language_models import CodeGenerationModel + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +def tune_code_generation_model() -> CodeGenerationModel: + # Initialize Vertex AI + vertexai.init(project=PROJECT_ID, location="us-central1") + + model = CodeGenerationModel.from_pretrained("code-bison@002") + + # TODO(developer): Update the training data path + tuning_job = model.tune_model( + training_data="gs://cloud-samples-data/ai-platform/generative_ai/headline_classification.jsonl", + tuning_job_location="europe-west4", + tuned_model_location="us-central1", + ) + + print(tuning_job._status) + # Example response: + # ... + # pipeline_job = aiplatform.PipelineJob.get('projects/1234567890/locations/europe-west4/pipelineJobs/tune... + # PipelineState.PIPELINE_STATE_PENDING + return model + + +# [END generativeaionvertexai_sdk_tune_code_generation_model] + +if __name__ == "__main__": + tune_code_generation_model() diff --git a/generative_ai/model_tuning/pretrained_examples_test.py b/generative_ai/model_tuning/pretrained_examples_test.py new file mode 100644 index 00000000000..9c4f3e589df --- /dev/null +++ b/generative_ai/model_tuning/pretrained_examples_test.py @@ -0,0 +1,74 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import backoff + +from google.api_core.exceptions import ResourceExhausted + +from google.cloud import aiplatform + +import pretrained_codegen_example +import pretrained_list_example +import pretrained_textgen_example + +import pytest + +from vertexai.language_models import TextGenerationModel + + +def teardown_model(tuned_model: TextGenerationModel) -> None: + for tuned_model_name in tuned_model.list_tuned_model_names(): + model_registry = aiplatform.models.ModelRegistry(model=tuned_model_name) + + display_name = model_registry.get_version_info("1").model_display_name + for endpoint in aiplatform.Endpoint.list(): + for _ in endpoint.list_models(): + if endpoint.display_name == display_name: + endpoint.undeploy_all() + endpoint.delete() + aiplatform.Model(model_registry.model_resource_name).delete() + + +@pytest.mark.skip("Blocked on b/277959219") +def test_tuning_code_generation_model() -> None: + """Takes approx. 20 minutes.""" + tuned_model = pretrained_codegen_example.tune_code_generation_model() + try: + assert tuned_model + finally: + teardown_model(tuned_model) + + +@pytest.mark.skip("Blocked on b/277959219") +def test_tuning() -> None: + """Takes approx. 20 minutes.""" + tuned_model = pretrained_textgen_example.tuning() + try: + assert tuned_model + finally: + teardown_model(tuned_model) + + +@backoff.on_exception(backoff.expo, ResourceExhausted, max_time=10) +def test_list_tuned_models() -> None: + tuned_model_names = pretrained_list_example.list_tuned_models() + filtered_models_counter = 0 + for tuned_model_name in tuned_model_names: + model_registry = aiplatform.models.ModelRegistry(model=tuned_model_name) + if ( + "Vertex LLM Test Fixture " + "(list_tuned_models_test.py::test_list_tuned_models)" + ) in model_registry.get_version_info("1").model_display_name: + filtered_models_counter += 1 + assert filtered_models_counter == 0 diff --git a/generative_ai/model_tuning/pretrained_list_example.py b/generative_ai/model_tuning/pretrained_list_example.py new file mode 100644 index 00000000000..15a03b577bb --- /dev/null +++ b/generative_ai/model_tuning/pretrained_list_example.py @@ -0,0 +1,44 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +def list_tuned_models() -> None: + """List tuned models.""" + # [START generativeaionvertexai_sdk_list_tuned_models] + import vertexai + + from vertexai.language_models import TextGenerationModel + + # TODO(developer): Update and un-comment below line + # PROJECT_ID = "your-project-id" + + vertexai.init(project=PROJECT_ID, location="us-central1") + + model = TextGenerationModel.from_pretrained("text-bison@002") + tuned_model_names = model.list_tuned_model_names() + print(tuned_model_names) + # Example response: + # ['projects/1234567890/locations/us-central1/models/1234567889012345', + # ...] + + # [END generativeaionvertexai_sdk_list_tuned_models] + + return tuned_model_names + + +if __name__ == "__main__": + list_tuned_models() diff --git a/generative_ai/model_tuning/pretrained_textgen_example.py b/generative_ai/model_tuning/pretrained_textgen_example.py new file mode 100644 index 00000000000..ffee9a4b1b7 --- /dev/null +++ b/generative_ai/model_tuning/pretrained_textgen_example.py @@ -0,0 +1,54 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START generativeaionvertexai_sdk_tuning] +from __future__ import annotations + +import os + +from vertexai.language_models import TextGenerationModel + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +def tuning() -> TextGenerationModel: + import vertexai + from vertexai.language_models import TextGenerationModel + + # Initialize Vertex AI + vertexai.init(project=PROJECT_ID, location="us-central1") + + model = TextGenerationModel.from_pretrained("text-bison@002") + + # TODO(developer): Update the training data path + tuning_job = model.tune_model( + training_data="gs://cloud-samples-data/ai-platform/generative_ai/headline_classification.jsonl", + tuning_job_location="europe-west4", + tuned_model_location="us-central1", + ) + + print(tuning_job._status) + # Example response: + # pipeline_job = aiplatform.PipelineJob.get('projects/1234567890/locations/europe-west4/pipelineJobs/tune... + # View Pipeline Job: + # ... + # PipelineState.PIPELINE_STATE_PENDING + + return model + + +# [END generativeaionvertexai_sdk_tuning] + +if __name__ == "__main__": + tuning() diff --git a/generative_ai/model_tuning/supervised_advanced_example.py b/generative_ai/model_tuning/supervised_advanced_example.py new file mode 100644 index 00000000000..33897245553 --- /dev/null +++ b/generative_ai/model_tuning/supervised_advanced_example.py @@ -0,0 +1,63 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from vertexai.preview.tuning import sft + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +def gemini_tuning_advanced() -> sft.SupervisedTuningJob: + # [START generativeaionvertexai_tuning_advanced] + + import time + + import vertexai + from vertexai.preview.tuning import sft + + # TODO(developer): Update and un-comment below line + # PROJECT_ID = "your-project-id" + vertexai.init(project=PROJECT_ID, location="us-central1") + + sft_tuning_job = sft.train( + source_model="gemini-1.0-pro-002", + train_dataset="gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl", + # The following parameters are optional + validation_dataset="gs://cloud-samples-data/ai-platform/generative_ai/sft_validation_data.jsonl", + epochs=4, + adapter_size=4, + learning_rate_multiplier=1.0, + tuned_model_display_name="tuned_gemini_pro", + ) + + # Polling for job completion + while not sft_tuning_job.has_ended: + time.sleep(60) + sft_tuning_job.refresh() + + print(sft_tuning_job.tuned_model_name) + print(sft_tuning_job.tuned_model_endpoint_name) + print(sft_tuning_job.experiment) + # Example response: + # projects/123456789012/locations/us-central1/models/1234567890@1 + # projects/123456789012/locations/us-central1/endpoints/123456789012345 + # + + # [END generativeaionvertexai_tuning_advanced] + return sft_tuning_job + + +if __name__ == "__main__": + gemini_tuning_advanced() diff --git a/generative_ai/model_tuning/supervised_cancel_example.py b/generative_ai/model_tuning/supervised_cancel_example.py new file mode 100644 index 00000000000..a82b275c9a7 --- /dev/null +++ b/generative_ai/model_tuning/supervised_cancel_example.py @@ -0,0 +1,40 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") +LOCATION = "us-central1" + + +def cancel_tuning_job() -> None: + # [START generativeaionvertexai_cancel_tuning_job] + import vertexai + from vertexai.preview.tuning import sft + + # TODO(developer): Update and un-comment below lines + # PROJECT_ID = "your-project-id" + # LOCATION = "us-central1" + vertexai.init(project=PROJECT_ID, location=LOCATION) + + tuning_job_id = "4982013113894174720" + job = sft.SupervisedTuningJob( + f"projects/{PROJECT_ID}/locations/{LOCATION}/tuningJobs/{tuning_job_id}" + ) + job.cancel() + # [END generativeaionvertexai_cancel_tuning_job] + + +if __name__ == "__main__": + cancel_tuning_job() diff --git a/generative_ai/model_tuning/supervised_example.py b/generative_ai/model_tuning/supervised_example.py new file mode 100644 index 00000000000..c0cc7f94ae6 --- /dev/null +++ b/generative_ai/model_tuning/supervised_example.py @@ -0,0 +1,57 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from vertexai.preview.tuning import sft + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +def gemini_tuning_basic() -> sft.SupervisedTuningJob: + # [START generativeaionvertexai_tuning_basic] + + import time + + import vertexai + from vertexai.preview.tuning import sft + + # TODO(developer): Update and un-comment below line + # PROJECT_ID = "your-project-id" + vertexai.init(project=PROJECT_ID, location="us-central1") + + sft_tuning_job = sft.train( + source_model="gemini-1.0-pro-002", + train_dataset="gs://cloud-samples-data/ai-platform/generative_ai/sft_train_data.jsonl", + ) + + # Polling for job completion + while not sft_tuning_job.has_ended: + time.sleep(60) + sft_tuning_job.refresh() + + print(sft_tuning_job.tuned_model_name) + print(sft_tuning_job.tuned_model_endpoint_name) + print(sft_tuning_job.experiment) + # Example response: + # projects/123456789012/locations/us-central1/models/1234567890@1 + # projects/123456789012/locations/us-central1/endpoints/123456789012345 + # + + # [END generativeaionvertexai_tuning_basic] + return sft_tuning_job + + +if __name__ == "__main__": + gemini_tuning_basic() diff --git a/generative_ai/model_tuning/supervised_get_example.py b/generative_ai/model_tuning/supervised_get_example.py new file mode 100644 index 00000000000..cb0f3252780 --- /dev/null +++ b/generative_ai/model_tuning/supervised_get_example.py @@ -0,0 +1,48 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from vertexai.preview.tuning import sft + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") +LOCATION = "us-central1" + + +def get_tuning_job() -> sft.SupervisedTuningJob: + # [START generativeaionvertexai_get_tuning_job] + import vertexai + from vertexai.preview.tuning import sft + + # TODO(developer): Update and un-comment below lines + # PROJECT_ID = "your-project-id" + # LOCATION = "us-central1" + vertexai.init(project=PROJECT_ID, location=LOCATION) + + tuning_job_id = "4982013113894174720" + response = sft.SupervisedTuningJob( + f"projects/{PROJECT_ID}/locations/{LOCATION}/tuningJobs/{tuning_job_id}" + ) + + print(response) + # Example response: + # + # resource name: projects/1234567890/locations/us-central1/tuningJobs/4982013113894174720 + + # [END generativeaionvertexai_get_tuning_job] + return response + + +if __name__ == "__main__": + get_tuning_job() diff --git a/generative_ai/model_tuning/supervised_list_example.py b/generative_ai/model_tuning/supervised_list_example.py new file mode 100644 index 00000000000..7fe99529fa4 --- /dev/null +++ b/generative_ai/model_tuning/supervised_list_example.py @@ -0,0 +1,46 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from typing import List + +from vertexai.preview.tuning import sft + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +def list_tuning_jobs() -> List[sft.SupervisedTuningJob]: + # [START generativeaionvertexai_list_tuning_jobs] + import vertexai + from vertexai.preview.tuning import sft + + # TODO(developer): Update and un-comment below line + # PROJECT_ID = "your-project-id" + vertexai.init(project=PROJECT_ID, location="us-central1") + + responses = sft.SupervisedTuningJob.list() + + for response in responses: + print(response) + # Example response: + # + # resource name: projects/12345678/locations/us-central1/tuningJobs/123456789012345 + + # [END generativeaionvertexai_list_tuning_jobs] + return responses + + +if __name__ == "__main__": + list_tuning_jobs() diff --git a/generative_ai/model_tuning/supervised_tuning_examples_test.py b/generative_ai/model_tuning/supervised_tuning_examples_test.py new file mode 100644 index 00000000000..28894b77a04 --- /dev/null +++ b/generative_ai/model_tuning/supervised_tuning_examples_test.py @@ -0,0 +1,45 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import supervised_advanced_example +import supervised_cancel_example +import supervised_example +import supervised_get_example +import supervised_list_example + + +@pytest.mark.skip(reason="Skip due to tuning taking a long time.") +def test_gemini_tuning() -> None: + response = supervised_example.gemini_tuning_basic() + assert response + + response = supervised_advanced_example.gemini_tuning_advanced() + assert response + + +def test_get_tuning_job() -> None: + response = supervised_get_example.get_tuning_job() + assert response + + +def test_list_tuning_jobs() -> None: + response = supervised_list_example.list_tuning_jobs() + assert response + + +@pytest.mark.skip(reason="Skip due to tuning taking a long time.") +def test_cancel_tuning_job() -> None: + supervised_cancel_example.cancel_tuning_job() diff --git a/generative_ai/model_tuning/tune_code_generation_model.py b/generative_ai/model_tuning/tune_code_generation_model.py new file mode 100644 index 00000000000..d6aab9981d5 --- /dev/null +++ b/generative_ai/model_tuning/tune_code_generation_model.py @@ -0,0 +1,53 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import os + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +def tune_code_generation_model() -> None: + # [START generativeaionvertexai_tune_code_generation_model] + import vertexai + from vertexai.language_models import CodeGenerationModel + + # TODO(developer): Update and un-comment below line + # PROJECT_ID = "your-project-id" + + # Initialize Vertex AI + vertexai.init(project=PROJECT_ID, location="us-central1") + + model = CodeGenerationModel.from_pretrained("code-bison@002") + + # TODO(developer): Update the training data path + tuning_job = model.tune_model( + training_data="gs://cloud-samples-data/ai-platform/generative_ai/headline_classification.jsonl", + tuning_job_location="europe-west4", + tuned_model_location="us-central1", + ) + + print(tuning_job._status) + # Example response: + # ... + # pipeline_job = aiplatform.PipelineJob.get('projects/1234567890/locations/europe-west4/pipelineJobs/tune... + # PipelineState.PIPELINE_STATE_PENDING + + # [END generativeaionvertexai_tune_code_generation_model] + return model + + +if __name__ == "__main__": + tune_code_generation_model() diff --git a/generative_ai/model_tuning/tuning.py b/generative_ai/model_tuning/tuning.py new file mode 100644 index 00000000000..6001e569659 --- /dev/null +++ b/generative_ai/model_tuning/tuning.py @@ -0,0 +1,56 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import os + +from vertexai.language_models import TextGenerationModel + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") + + +def tuning() -> TextGenerationModel: + # [START generativeaionvertexai_tuning] + import vertexai + from vertexai.language_models import TextGenerationModel + + # TODO(developer): Update and un-comment below line + # PROJECT_ID = "your-project-id" + + # Initialize Vertex AI + vertexai.init(project=PROJECT_ID, location="us-central1") + + model = TextGenerationModel.from_pretrained("text-bison@002") + + # TODO(developer): Update the training data path + tuning_job = model.tune_model( + training_data="gs://cloud-samples-data/ai-platform/generative_ai/headline_classification.jsonl", + tuning_job_location="europe-west4", + tuned_model_location="us-central1", + ) + + print(tuning_job._status) + # Example response: + # pipeline_job = aiplatform.PipelineJob.get('projects/1234567890/locations/europe-west4/pipelineJobs/tune... + # View Pipeline Job: + # ... + # PipelineState.PIPELINE_STATE_PENDING + + # [END generativeaionvertexai_tuning] + return model + + +if __name__ == "__main__": + tuning()