Skip to content

Commit 3247f92

Browse files
Thoughtseize1riathakkar
authored andcommitted
refactor: (GenAI) Reorganized Embeddings Samples (Group A) (GoogleCloudPlatform#12575)
* New reorganized files * Files to delete * Update region tags * Fix lint * Changed "Example output" -> "Example response"
1 parent 6d33c96 commit 3247f92

24 files changed

+619
-8
lines changed

generative_ai/embedding.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
# TODO Delete this file after approving /embeddings/document_retrieval_example.py
1415

1516
# [START generativeaionvertexai_sdk_embedding]
1617
from typing import List, Optional

generative_ai/embedding_batch.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
# TODO: Delete this file after approving /embeddings/batch_example.py
1516
import os
1617

1718
from google.cloud.aiplatform import BatchPredictionJob

generative_ai/embedding_batch_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
14+
# TODO: Delete this file after approving /embeddings/batch_example_test.py
1515
import os
1616

1717
import backoff

generative_ai/embedding_model_tuning.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
14+
# TODO Delete this file after approving /embeddings/model_tuning_example.py
1515
# [START generativeaionvertexai_sdk_embedding]
1616
import re
1717

generative_ai/embedding_model_tuning_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
14+
# TODO Delete this file after approving /embeddings/model_tuning_test.py
1515
import os
1616

1717
import backoff

generative_ai/embedding_preview.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
14+
# TODO: Delete this file after approving /embeddings/code_retrieval_example.py
1515
from __future__ import annotations
1616

1717
# [START generativeaionvertexai_sdk_embedding]

generative_ai/embedding_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
14+
# TODO: Delete this file after approving /embeddings/retrievals_test.py
1515
import backoff
1616

1717
from google.api_core.exceptions import ResourceExhausted
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from google.cloud.aiplatform import BatchPredictionJob
17+
18+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
19+
OUTPUT_URI = os.getenv("GCS_OUTPUT_URI")
20+
21+
22+
def embed_text_batch() -> BatchPredictionJob:
23+
"""Example of how to generate embeddings from text using batch processing.
24+
25+
Read more: https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/batch-prediction-genai-embeddings
26+
"""
27+
# [START generativeaionvertexai_embedding_batch]
28+
import vertexai
29+
from vertexai.preview import language_models
30+
31+
# TODO(developer): Uncomment and set your project ID
32+
# PROJECT_ID = "your-project-id"
33+
vertexai.init(project=PROJECT_ID, location="us-central1")
34+
input_uri = (
35+
"gs://cloud-samples-data/generative-ai/embeddings/embeddings_input.jsonl"
36+
)
37+
# Format: `"gs://your-bucket-unique-name/directory/` or `bq://project_name.llm_dataset`
38+
output_uri = OUTPUT_URI
39+
40+
textembedding_model = language_models.TextEmbeddingModel.from_pretrained(
41+
"textembedding-gecko"
42+
)
43+
44+
batch_prediction_job = textembedding_model.batch_predict(
45+
dataset=[input_uri],
46+
destination_uri_prefix=output_uri,
47+
)
48+
print(batch_prediction_job.display_name)
49+
print(batch_prediction_job.resource_name)
50+
print(batch_prediction_job.state)
51+
# Example response:
52+
# BatchPredictionJob 2024-09-10 15:47:51.336391
53+
# projects/1234567890/locations/us-central1/batchPredictionJobs/123456789012345
54+
# JobState.JOB_STATE_SUCCEEDED
55+
56+
# [END generativeaionvertexai_embedding_batch]
57+
58+
return batch_prediction_job
59+
60+
61+
if __name__ == "__main__":
62+
embed_text_batch()
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
import backoff
17+
18+
import batch_example
19+
20+
from google.api_core.exceptions import ResourceExhausted
21+
22+
import pytest
23+
24+
25+
@backoff.on_exception(backoff.expo, ResourceExhausted, max_time=10)
26+
@pytest.fixture(scope="session", autouse=True)
27+
def test_embed_text_batch() -> None:
28+
os.environ["GCS_OUTPUT_URI"] = "gs://python-docs-samples-tests/"
29+
batch_prediction_job = batch_example.embed_text_batch()
30+
assert batch_prediction_job
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
# [START generativeaionvertexai_embedding_code_retrieval]
18+
from vertexai.language_models import TextEmbeddingInput, TextEmbeddingModel
19+
20+
MODEL_NAME = "text-embedding-preview-0815"
21+
DIMENSIONALITY = 256
22+
23+
24+
def embed_text(
25+
texts: list[str] = ["Retrieve a function that adds two numbers"],
26+
task: str = "CODE_RETRIEVAL_QUERY",
27+
model_name: str = "text-embedding-preview-0815",
28+
dimensionality: int | None = 256,
29+
) -> list[list[float]]:
30+
"""Embeds texts with a pre-trained, foundational model."""
31+
model = TextEmbeddingModel.from_pretrained(model_name)
32+
inputs = [TextEmbeddingInput(text, task) for text in texts]
33+
kwargs = dict(output_dimensionality=dimensionality) if dimensionality else {}
34+
embeddings = model.get_embeddings(inputs, **kwargs)
35+
# Example response:
36+
# [[0.025890009477734566, -0.05553026497364044, 0.006374752148985863,...],
37+
return [embedding.values for embedding in embeddings]
38+
39+
40+
if __name__ == "__main__":
41+
# Embeds code block with a pre-trained, foundational model.
42+
# Using this function to calculate the embedding for corpus.
43+
texts = ["Retrieve a function that adds two numbers"]
44+
task = "CODE_RETRIEVAL_QUERY"
45+
code_block_embeddings = embed_text(
46+
texts=texts, task=task, model_name=MODEL_NAME, dimensionality=DIMENSIONALITY
47+
)
48+
49+
# Embeds code retrieval with a pre-trained, foundational model.
50+
# Using this function to calculate the embedding for query.
51+
texts = [
52+
"def func(a, b): return a + b",
53+
"def func(a, b): return a - b",
54+
"def func(a, b): return (a ** 2 + b ** 2) ** 0.5",
55+
]
56+
task = "RETRIEVAL_DOCUMENT"
57+
code_query_embeddings = embed_text(
58+
texts=texts, task=task, model_name=MODEL_NAME, dimensionality=DIMENSIONALITY
59+
)
60+
61+
# [END generativeaionvertexai_embedding_code_retrieval]

0 commit comments

Comments
 (0)