Skip to content

Commit 1e8a2f1

Browse files
chore!: Remove attach_logprobs parameter from AI operations (#1816)
* Refactor: Remove attach_logprobs parameter from AI operations This commit removes the `attach_logprobs` parameter from the `filter`, `map`, `classify`, and `join` methods within the `AIAccessor` class in `bigframes/operations/ai.py`. The associated logic for calculating and attaching the 'logprob' column has also been removed from the `map` method. System tests in `tests/system/large/operations/test_ai.py` that specifically tested the `attach_logprobs` functionality have been updated by: - Removing the `attach_logprobs=True` argument from method calls. - Removing assertions for the 'logprob' column. - Renaming the test methods to reflect their updated scope (e.g., `test_filter_attach_logprob` to `test_filter_functionality_formerly_attach_logprob`). The small system tests and experimental notebooks were not affected as they did not utilize this parameter. * polish tests --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
1 parent dc9eb27 commit 1e8a2f1

File tree

2 files changed

+0
-116
lines changed

2 files changed

+0
-116
lines changed

bigframes/operations/ai.py

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ def filter(
4141
instruction: str,
4242
model,
4343
ground_with_google_search: bool = False,
44-
attach_logprobs: bool = False,
4544
):
4645
"""
4746
Filters the DataFrame with the semantics of the user instruction.
@@ -82,10 +81,6 @@ def filter(
8281
page for details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models
8382
The default is `False`.
8483
85-
attach_logprobs (bool, default False):
86-
Controls whether to attach an additional "logprob" column for each result. Logprobs are float-point values reflecting the confidence level
87-
of the LLM for their responses. Higher values indicate more confidence. The value is in the range between negative infinite and 0.
88-
8984
Returns:
9085
bigframes.pandas.DataFrame: DataFrame filtered by the instruction.
9186
@@ -103,7 +98,6 @@ def filter(
10398
model,
10499
output_schema,
105100
ground_with_google_search,
106-
attach_logprobs,
107101
)
108102

109103
return result[result[answer_col]].drop(answer_col, axis=1)
@@ -114,7 +108,6 @@ def map(
114108
model,
115109
output_schema: Dict[str, str] | None = None,
116110
ground_with_google_search: bool = False,
117-
attach_logprobs=False,
118111
):
119112
"""
120113
Maps the DataFrame with the semantics of the user instruction. The name of the keys in the output_schema parameter carry
@@ -180,11 +173,6 @@ def map(
180173
page for details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models
181174
The default is `False`.
182175
183-
attach_logprobs (bool, default False):
184-
Controls whether to attach an additional "logprob" column for each result. Logprobs are float-point values reflecting the confidence level
185-
of the LLM for their responses. Higher values indicate more confidence. The value is in the range between negative infinite and 0.
186-
187-
188176
Returns:
189177
bigframes.pandas.DataFrame: DataFrame with attached mapping results.
190178
@@ -258,19 +246,6 @@ def map(
258246

259247
attach_columns = [results[col] for col, _ in output_schema.items()]
260248

261-
def extract_logprob(s: bigframes.series.Series) -> bigframes.series.Series:
262-
from bigframes import bigquery as bbq
263-
264-
logprob_jsons = bbq.json_extract_array(s, "$.candidates").list[0]
265-
logprobs = bbq.json_extract(logprob_jsons, "$.avg_logprobs").astype(
266-
"Float64"
267-
)
268-
logprobs.name = "logprob"
269-
return logprobs
270-
271-
if attach_logprobs:
272-
attach_columns.append(extract_logprob(results["full_response"]))
273-
274249
from bigframes.core.reshape.api import concat
275250

276251
return concat([self._df, *attach_columns], axis=1)
@@ -282,7 +257,6 @@ def classify(
282257
labels: Sequence[str],
283258
output_column: str = "result",
284259
ground_with_google_search: bool = False,
285-
attach_logprobs=False,
286260
):
287261
"""
288262
Classifies the rows of dataframes based on user instruction into the provided labels.
@@ -337,11 +311,6 @@ def classify(
337311
page for details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models
338312
The default is `False`.
339313
340-
attach_logprobs (bool, default False):
341-
Controls whether to attach an additional "logprob" column for each result. Logprobs are float-point values reflecting the confidence level
342-
of the LLM for their responses. Higher values indicate more confidence. The value is in the range between negative infinite and 0.
343-
344-
345314
Returns:
346315
bigframes.pandas.DataFrame: DataFrame with classification result.
347316
@@ -367,7 +336,6 @@ def classify(
367336
model,
368337
output_schema={output_column: "string"},
369338
ground_with_google_search=ground_with_google_search,
370-
attach_logprobs=attach_logprobs,
371339
)
372340

373341
def join(
@@ -376,7 +344,6 @@ def join(
376344
instruction: str,
377345
model,
378346
ground_with_google_search: bool = False,
379-
attach_logprobs=False,
380347
):
381348
"""
382349
Joines two dataframes by applying the instruction over each pair of rows from
@@ -428,10 +395,6 @@ def join(
428395
page for details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models
429396
The default is `False`.
430397
431-
attach_logprobs (bool, default False):
432-
Controls whether to attach an additional "logprob" column for each result. Logprobs are float-point values reflecting the confidence level
433-
of the LLM for their responses. Higher values indicate more confidence. The value is in the range between negative infinite and 0.
434-
435398
Returns:
436399
bigframes.pandas.DataFrame: The joined dataframe.
437400
@@ -510,7 +473,6 @@ def join(
510473
instruction,
511474
model,
512475
ground_with_google_search=ground_with_google_search,
513-
attach_logprobs=attach_logprobs,
514476
).reset_index(drop=True)
515477

516478
def search(

tests/system/large/operations/test_ai.py

Lines changed: 0 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -66,31 +66,6 @@ def test_filter(session, gemini_flash_model):
6666
)
6767

6868

69-
def test_filter_attach_logprob(session, gemini_flash_model):
70-
df = dataframe.DataFrame(
71-
data={
72-
"number_1": [1, 2],
73-
"number_2": [2, 1],
74-
"col": [0, 0],
75-
},
76-
session=session,
77-
)
78-
79-
with bigframes.option_context(
80-
AI_OP_EXP_OPTION,
81-
True,
82-
THRESHOLD_OPTION,
83-
10,
84-
):
85-
actual_df = df.ai.filter(
86-
"{number_1} is greater than {number_2}",
87-
gemini_flash_model,
88-
attach_logprobs=True,
89-
).to_pandas()
90-
91-
assert "logprob" in actual_df.columns
92-
93-
9469
def test_filter_multi_model(session, gemini_flash_model):
9570
with bigframes.option_context(
9671
AI_OP_EXP_OPTION,
@@ -259,31 +234,6 @@ def test_map(session, gemini_flash_model, output_schema, output_col):
259234
)
260235

261236

262-
def test_map_attach_logprob(session, gemini_flash_model):
263-
df = dataframe.DataFrame(
264-
data={
265-
"ingredient_1": ["Burger Bun", "Soy Bean"],
266-
"ingredient_2": ["Beef Patty", "Bittern"],
267-
"gluten-free": [True, True],
268-
},
269-
session=session,
270-
)
271-
272-
with bigframes.option_context(
273-
AI_OP_EXP_OPTION,
274-
True,
275-
THRESHOLD_OPTION,
276-
10,
277-
):
278-
actual_df = df.ai.map(
279-
"What is the {gluten-free} food made from {ingredient_1} and {ingredient_2}? One word only.",
280-
gemini_flash_model,
281-
attach_logprobs=True,
282-
).to_pandas()
283-
284-
assert "logprob" in actual_df.columns
285-
286-
287237
def test_map_multimodel(session, gemini_flash_model):
288238
with bigframes.option_context(
289239
AI_OP_EXP_OPTION,
@@ -478,34 +428,6 @@ def test_join(instruction, session, gemini_flash_model):
478428
)
479429

480430

481-
def test_join_attach_logprob(session, gemini_flash_model):
482-
cities = dataframe.DataFrame(
483-
data={
484-
"city": ["Seattle", "Berlin"],
485-
},
486-
session=session,
487-
)
488-
countries = dataframe.DataFrame(
489-
data={"country": ["USA", "UK", "Germany"]},
490-
session=session,
491-
)
492-
493-
with bigframes.option_context(
494-
AI_OP_EXP_OPTION,
495-
True,
496-
THRESHOLD_OPTION,
497-
10,
498-
):
499-
actual_df = cities.ai.join(
500-
countries,
501-
"{city} is in {country}",
502-
gemini_flash_model,
503-
attach_logprobs=True,
504-
).to_pandas()
505-
506-
assert "logprob" in actual_df.columns
507-
508-
509431
@pytest.mark.parametrize(
510432
("reply"),
511433
[

0 commit comments

Comments
 (0)