feat: gemma2 samples with accelerated TPU and GPU

BigBlackWolf · BigBlackWolf · commit 570c1a929f75 · 2024-09-19T14:34:50.000+02:00
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -60,6 +60,7 @@
 /auth/                                 @GoogleCloudPlatform/go-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/googleapis-auth
 /batch/                                @GoogleCloudPlatform/go-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra
 /compute/                              @GoogleCloudPlatform/go-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra
+/gemma2                                @GoogleCloudPlatform/go-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra
 /iam/                                  @GoogleCloudPlatform/go-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra
 /iap/                                  @GoogleCloudPlatform/go-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra
 /kms/                                  @GoogleCloudPlatform/go-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra
diff --git a/gemma2/gemma2_predict_gpu.go b/gemma2/gemma2_predict_gpu.go
@@ -0,0 +1,83 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package snippets
+
+// [START generativeaionvertexai_gemma2_predict_gpu]
+import (
+	"context"
+	"fmt"
+	"io"
+
+	"cloud.google.com/go/aiplatform/apiv1/aiplatformpb"
+	"github.com/googleapis/gax-go"
+
+	"google.golang.org/protobuf/types/known/structpb"
+)
+
+type ClientInterface interface {
+	Close() error
+	Predict(ctx context.Context, req *aiplatformpb.PredictRequest, opts ...gax.CallOption) (*aiplatformpb.PredictResponse, error)
+}
+
+// predictGPU demopnstrates how to run interference on a Gemma2 model deployed to a Vertex AI endpoint with GPU accellerators.
+func predictGPU(w io.Writer, client ClientInterface, projectID, location, endpointID string) error {
+	ctx := context.Background()
+
+	// Note: client can be initialised in the following way:
+	// apiEndpoint := fmt.Sprintf("%s-aiplatform.googleapis.com:443", location)
+	// client, err := aiplatform.NewPredictionClient(ctx, option.WithEndpoint(apiEndpoint))
+	// if err != nil {
+	// 	return fmt.Errorf("unable to create prediction client: %v", err)
+	// }
+	// defer client.Close()
+
+	gemma2Endpoint := fmt.Sprintf("projects/%s/locations/%s/endpoints/%s", projectID, location, endpointID)
+	prompt := "Why is the sky blue?"
+	parameters := map[string]interface{}{
+		"temperature":     0.9,
+		"maxOutputTokens": 1024,
+		"topP":            1.0,
+		"topK":            1,
+	}
+
+	// Encapsulate the prompt in a correct format for TPUs.
+	// Pay attention that prompt should be set in "inputs" field.
+	promptValue, err := structpb.NewValue(map[string]interface{}{
+		"inputs":     prompt,
+		"parameters": parameters,
+	})
+	if err != nil {
+		fmt.Fprintf(w, "unable to convert prompt to Value: %v", err)
+		return err
+	}
+
+	req := &aiplatformpb.PredictRequest{
+		Endpoint:  gemma2Endpoint,
+		Instances: []*structpb.Value{promptValue},
+	}
+
+	resp, err := client.Predict(ctx, req)
+	if err != nil {
+		return err
+	}
+
+	prediction := resp.GetPredictions()
+	value := prediction[0].GetStringValue()
+	fmt.Fprintf(w, "%v", value)
+
+	return nil
+}
+
+// [END generativeaionvertexai_gemma2_predict_gpu]
diff --git a/gemma2/gemma2_predict_tpu.go b/gemma2/gemma2_predict_tpu.go
@@ -0,0 +1,82 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package snippets
+
+// [START generativeaionvertexai_gemma2_predict_tpu]
+import (
+	"context"
+	"fmt"
+	"io"
+
+	"cloud.google.com/go/aiplatform/apiv1/aiplatformpb"
+	"github.com/googleapis/gax-go"
+
+	"google.golang.org/protobuf/types/known/structpb"
+)
+
+type PredictClientInterface interface {
+	Close() error
+	Predict(ctx context.Context, req *aiplatformpb.PredictRequest, opts ...gax.CallOption) (*aiplatformpb.PredictResponse, error)
+}
+
+// predictTPU demopnstrates how to run interference on a Gemma2 model deployed to a Vertex AI endpoint with TPU accellerators.
+func predictTPU(w io.Writer, client PredictClientInterface, projectID, location, endpointID string) error {
+	ctx := context.Background()
+
+	// Note: client can be initialised in the following way:
+	// apiEndpoint := fmt.Sprintf("%s-aiplatform.googleapis.com:443", location)
+	// client, err := aiplatform.NewPredictionClient(ctx, option.WithEndpoint(apiEndpoint))
+	// if err != nil {
+	// 	return fmt.Errorf("unable to create prediction client: %v", err)
+	// }
+	// defer client.Close()
+
+	gemma2Endpoint := fmt.Sprintf("projects/%s/locations/%s/endpoints/%s", projectID, location, endpointID)
+	prompt := "Why is the sky blue?"
+	parameters := map[string]interface{}{
+		"temperature":     0.9,
+		"maxOutputTokens": 1024,
+		"topP":            1.0,
+		"topK":            1,
+	}
+
+	// Encapsulate the prompt in a correct format for TPUs.
+	promptValue, err := structpb.NewValue(map[string]interface{}{
+		"prompt":     prompt,
+		"parameters": parameters,
+	})
+	if err != nil {
+		fmt.Fprintf(w, "unable to convert prompt to Value: %v", err)
+		return err
+	}
+
+	req := &aiplatformpb.PredictRequest{
+		Endpoint:  gemma2Endpoint,
+		Instances: []*structpb.Value{promptValue},
+	}
+
+	resp, err := client.Predict(ctx, req)
+	if err != nil {
+		return err
+	}
+
+	prediction := resp.GetPredictions()
+	value := prediction[0].GetStringValue()
+	fmt.Fprintf(w, "%v", value)
+
+	return nil
+}
+
+// [END generativeaionvertexai_gemma2_predict_tpu]
diff --git a/gemma2/gemma2_test.go b/gemma2/gemma2_test.go
@@ -0,0 +1,59 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package snippets
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"github.com/GoogleCloudPlatform/golang-samples/internal/testutil"
+)
+
+func TestPredictGPU(t *testing.T) {
+	tc := testutil.SystemTest(t)
+
+	projectID := tc.ProjectID
+	var buf bytes.Buffer
+	client := PredictionsClient{}
+
+	t.Run("GPU predict", func(t *testing.T) {
+		buf.Reset()
+		// Mock ID used to check if GPU was called
+		endpointID := "123456789"
+		location := "us-east4"
+		if err := predictGPU(&buf, client, projectID, location, endpointID); err != nil {
+			t.Fatal(err)
+		}
+
+		if got := buf.String(); !strings.Contains(got, "Rayleigh scattering") {
+			t.Error("generated text content not found in response")
+		}
+	})
+
+	t.Run("TPU predict", func(t *testing.T) {
+		buf.Reset()
+		// Mock ID used to check if TPU was called
+		endpointID := "123456789"
+		location := "us-west1"
+		if err := predictTPU(&buf, client, projectID, location, endpointID); err != nil {
+			t.Fatal(err)
+		}
+
+		if got := buf.String(); !strings.Contains(got, "Rayleigh scattering") {
+			t.Error("generated text content not found in response")
+		}
+	})
+}
diff --git a/gemma2/go.mod b/gemma2/go.mod
@@ -0,0 +1,53 @@
+module github.com/GoogleCloudPlatform/golang-samples/gemma2
+
+go 1.21
+
+require (
+	cloud.google.com/go/aiplatform v1.68.0
+	github.com/GoogleCloudPlatform/golang-samples v0.0.0-20240918200157-a00ca430a14b
+	github.com/googleapis/gax-go v1.0.3
+	github.com/googleapis/gax-go/v2 v2.13.0
+	google.golang.org/protobuf v1.34.2
+)
+
+require (
+	cloud.google.com/go v0.115.1 // indirect
+	cloud.google.com/go/auth v0.9.3 // indirect
+	cloud.google.com/go/auth/oauth2adapt v0.2.4 // indirect
+	cloud.google.com/go/compute/metadata v0.5.0 // indirect
+	cloud.google.com/go/iam v1.2.0 // indirect
+	cloud.google.com/go/longrunning v0.6.0 // indirect
+	cloud.google.com/go/storage v1.43.0 // indirect
+	github.com/BurntSushi/toml v0.3.1 // indirect
+	github.com/felixge/httpsnoop v1.0.4 // indirect
+	github.com/go-logr/logr v1.4.2 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
+	github.com/golang/protobuf v1.5.4 // indirect
+	github.com/google/s2a-go v0.1.8 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
+	go.opencensus.io v0.24.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect
+	go.opentelemetry.io/otel v1.29.0 // indirect
+	go.opentelemetry.io/otel/metric v1.29.0 // indirect
+	go.opentelemetry.io/otel/trace v1.29.0 // indirect
+	golang.org/x/crypto v0.27.0 // indirect
+	golang.org/x/exp v0.0.0-20190221220918-438050ddec5e // indirect
+	golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3 // indirect
+	golang.org/x/mod v0.17.0 // indirect
+	golang.org/x/net v0.29.0 // indirect
+	golang.org/x/oauth2 v0.23.0 // indirect
+	golang.org/x/sync v0.8.0 // indirect
+	golang.org/x/sys v0.25.0 // indirect
+	golang.org/x/text v0.18.0 // indirect
+	golang.org/x/time v0.6.0 // indirect
+	golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
+	google.golang.org/api v0.197.0 // indirect
+	google.golang.org/genproto v0.0.0-20240903143218-8af14fe29dc1 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
+	google.golang.org/grpc v1.66.1 // indirect
+	honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc // indirect
+)
diff --git a/gemma2/go.sum b/gemma2/go.sum
diff --git a/gemma2/mock_client.go b/gemma2/mock_client.go