docs: add code snippets for llm text generatiion (#669)

ashleyxuu · web-flow · commit 93416ed2f835 · 2024-05-09T13:20:20.000-07:00
* docs: add code snippets for llm text generatiion
diff --git a/samples/snippets/text_generation_test.py b/samples/snippets/text_generation_test.py
@@ -0,0 +1,68 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_llm_text_generation() -> None:
+    # Determine project id, in this case prefer the one set in the environment
+    # variable GOOGLE_CLOUD_PROJECT (if any)
+    import os
+
+    PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT", "bigframes-dev")
+    LOCATION = "US"
+
+    # [START bigquery_dataframes_generate_text_tutorial_create_remote_model]
+    import bigframes
+    from bigframes.ml.llm import PaLM2TextGenerator
+
+    bigframes.options.bigquery.project = PROJECT_ID
+    bigframes.options.bigquery.location = LOCATION
+
+    model = PaLM2TextGenerator()
+    # [END bigquery_dataframes_generate_text_tutorial_create_remote_model]
+    assert model is not None
+
+    # [START bigquery_dataframes_generate_text_tutorial_perform_keyword_extraction]
+    import bigframes.pandas as bpd
+
+    df = bpd.read_gbq("bigquery-public-data.imdb.reviews", max_results=5)
+    df_prompt_prefix = "Extract the key words from the text below: "
+    df_prompt = df_prompt_prefix + df["review"]
+
+    # Predict using the model
+    df_pred = model.predict(df_prompt, temperature=0.2, max_output_tokens=100)
+    df_pred.peek(5)
+    # [END bigquery_dataframes_generate_text_tutorial_perform_keyword_extraction]
+    # peek() is used to show a preview of the results. If the output
+    # of this sample changes, also update the screenshot for the associated
+    # tutorial on cloud.google.com.
+    assert df_pred["ml_generate_text_llm_result"] is not None
+    assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None
+
+    # [START bigquery_dataframes_generate_text_tutorial_perform_sentiment_analysis]
+    import bigframes.pandas as bpd
+
+    df = bpd.read_gbq("bigquery-public-data.imdb.reviews", max_results=5)
+    df_prompt_prefix = "perform sentiment analysis on the following text, return one the following categories: positive, negative: "
+    df_prompt = df_prompt_prefix + df["review"]
+
+    # Predict using the model
+    df_pred = model.predict(df_prompt, temperature=0.2, max_output_tokens=100)
+    df_pred.peek(5)
+    # [END bigquery_dataframes_generate_text_tutorial_perform_sentiment_analysis]
+    # peek() is used to show a preview of the results. If the output
+    # of this sample changes, also update the screenshot for the associated
+    # tutorial on cloud.google.com.
+
+    assert df_pred["ml_generate_text_llm_result"] is not None
+    assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None