Skip to content

Commit cce303b

Browse files
BenjaminKazemicopybara-github
authored andcommitted
feat: GenAI SDK client(multimodal) - Support Assemble feature on the multimodal datasets.
PiperOrigin-RevId: 824635300
1 parent 6737a70 commit cce303b

File tree

5 files changed

+711
-139
lines changed

5 files changed

+711
-139
lines changed
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# pylint: disable=protected-access,bad-continuation,missing-function-docstring
16+
17+
from tests.unit.vertexai.genai.replays import pytest_helper
18+
from vertexai._genai import types
19+
20+
import pytest
21+
22+
METADATA_SCHEMA_URI = (
23+
"gs://google-cloud-aiplatform/schema/dataset/metadata/multimodal_1.0.0.yaml"
24+
)
25+
BIGQUERY_TABLE_NAME = "vertex-sdk-dev.multimodal_dataset.test-table"
26+
DATASET = "8810841321427173376"
27+
28+
29+
def test_assemple_dataset(client):
30+
operation = client.datasets._assemble_multimodal_dataset(
31+
name=DATASET,
32+
gemini_request_read_config={
33+
"template_config": {
34+
"field_mapping": {"question": "questionColumn"},
35+
},
36+
},
37+
)
38+
assert isinstance(operation, types.MultimodalDatasetOperation)
39+
40+
41+
def test_assemple_dataset_public(client):
42+
assemble_dataset = client.datasets.assemble(
43+
name=DATASET,
44+
template_config=types.GeminiTemplateConfig(
45+
gemini_example=types.GeminiExample(
46+
model="gemini-1.5-flash",
47+
contents=[
48+
{
49+
"role": "user",
50+
"parts": [{"text": "What is the capital of {name}?"}],
51+
}
52+
],
53+
),
54+
),
55+
)
56+
assert isinstance(assemble_dataset, types.AssembleDataset)
57+
assert assemble_dataset.bigquery_destination.startswith(
58+
f"bq://{BIGQUERY_TABLE_NAME}"
59+
)
60+
61+
62+
pytestmark = pytest_helper.setup(
63+
file=__file__,
64+
globals_for_file=globals(),
65+
)
66+
67+
pytest_plugins = ("pytest_asyncio",)
68+
69+
70+
@pytest.mark.asyncio
71+
async def test_assemple_dataset_async(client):
72+
operation = await client.aio.datasets._assemble_multimodal_dataset(
73+
name=DATASET,
74+
gemini_request_read_config={
75+
"template_config": {
76+
"field_mapping": {"question": "questionColumn"},
77+
},
78+
},
79+
)
80+
assert isinstance(operation, types.MultimodalDatasetOperation)
81+
82+
83+
@pytest.mark.asyncio
84+
async def test_assemple_dataset_public_async(client):
85+
assemble_dataset = await client.aio.datasets.assemble(
86+
name=DATASET,
87+
template_config=types.GeminiTemplateConfig(
88+
gemini_example=types.GeminiExample(
89+
model="gemini-1.5-flash",
90+
contents=[
91+
{
92+
"role": "user",
93+
"parts": [{"text": "What is the capital of {name}?"}],
94+
}
95+
],
96+
),
97+
),
98+
)
99+
assert isinstance(assemble_dataset, types.AssembleDataset)
100+
assert assemble_dataset.bigquery_destination.startswith(
101+
f"bq://{BIGQUERY_TABLE_NAME}"
102+
)

tests/unit/vertexai/genai/replays/test_create_multimodal_datasets.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ def test_create_dataset_from_bigquery(client):
5454
)
5555
assert isinstance(dataset, types.MultimodalDataset)
5656
assert dataset.display_name == "test-from-bigquery"
57+
assert dataset.metadata.input_config.bigquery_source.uri == (
58+
f"bq://{BIGQUERY_TABLE_NAME}"
59+
)
5760

5861

5962
def test_create_dataset_from_bigquery_without_bq_prefix(client):
@@ -70,6 +73,9 @@ def test_create_dataset_from_bigquery_without_bq_prefix(client):
7073
)
7174
assert isinstance(dataset, types.MultimodalDataset)
7275
assert dataset.display_name == "test-from-bigquery"
76+
assert dataset.metadata.input_config.bigquery_source.uri == (
77+
f"bq://{BIGQUERY_TABLE_NAME}"
78+
)
7379

7480

7581
pytestmark = pytest_helper.setup(
@@ -111,6 +117,9 @@ async def test_create_dataset_from_bigquery_async(client):
111117
)
112118
assert isinstance(dataset, types.MultimodalDataset)
113119
assert dataset.display_name == "test-from-bigquery"
120+
assert dataset.metadata.input_config.bigquery_source.uri == (
121+
f"bq://{BIGQUERY_TABLE_NAME}"
122+
)
114123

115124

116125
@pytest.mark.asyncio
@@ -129,6 +138,9 @@ async def test_create_dataset_from_bigquery_async_with_timeout(client):
129138
)
130139
assert isinstance(dataset, types.MultimodalDataset)
131140
assert dataset.display_name == "test-from-bigquery"
141+
assert dataset.metadata.input_config.bigquery_source.uri == (
142+
f"bq://{BIGQUERY_TABLE_NAME}"
143+
)
132144

133145

134146
@pytest.mark.asyncio
@@ -146,3 +158,6 @@ async def test_create_dataset_from_bigquery_async_without_bq_prefix(client):
146158
)
147159
assert isinstance(dataset, types.MultimodalDataset)
148160
assert dataset.display_name == "test-from-bigquery"
161+
assert dataset.metadata.input_config.bigquery_source.uri == (
162+
f"bq://{BIGQUERY_TABLE_NAME}"
163+
)

0 commit comments

Comments
 (0)