|
33 | 33 | "name": "stderr",
|
34 | 34 | "output_type": "stream",
|
35 | 35 | "text": [
|
36 |
| - "/usr/local/google/home/chelsealin/src/bigframes3/bigframes/_config/experiment_options.py:33: UserWarning: Semantic operators are still under experiments, and are subject to change in the future.\n", |
| 36 | + "/usr/local/google/home/chelsealin/src/bigframes/bigframes/_config/experiment_options.py:33: UserWarning: Semantic operators are still under experiments, and are subject to change in the future.\n", |
37 | 37 | " warnings.warn(\n"
|
38 | 38 | ]
|
39 | 39 | }
|
|
51 | 51 | },
|
52 | 52 | {
|
53 | 53 | "cell_type": "code",
|
54 |
| - "execution_count": 3, |
| 54 | + "execution_count": 4, |
55 | 55 | "metadata": {},
|
56 | 56 | "outputs": [
|
57 | 57 | {
|
58 |
| - "name": "stderr", |
59 |
| - "output_type": "stream", |
60 |
| - "text": [ |
61 |
| - "/usr/local/google/home/chelsealin/src/bigframes3/bigframes/pandas/__init__.py:559: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", |
62 |
| - " return global_session.get_global_session()\n" |
63 |
| - ] |
| 58 | + "data": { |
| 59 | + "text/html": [ |
| 60 | + "Query job 13e4b10e-70cf-4b93-8c59-5f6f5fb10aeb is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:13e4b10e-70cf-4b93-8c59-5f6f5fb10aeb&page=queryresults\">Open Job</a>" |
| 61 | + ], |
| 62 | + "text/plain": [ |
| 63 | + "<IPython.core.display.HTML object>" |
| 64 | + ] |
| 65 | + }, |
| 66 | + "metadata": {}, |
| 67 | + "output_type": "display_data" |
64 | 68 | },
|
65 | 69 | {
|
66 | 70 | "data": {
|
67 | 71 | "text/html": [
|
68 |
| - "Query job aef2dd7b-bdad-4dda-91be-867e8dac2613 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:aef2dd7b-bdad-4dda-91be-867e8dac2613&page=queryresults\">Open Job</a>" |
| 72 | + "Query job 559dd42c-573d-4b00-8fe9-b7061afdd672 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:559dd42c-573d-4b00-8fe9-b7061afdd672&page=queryresults\">Open Job</a>" |
69 | 73 | ],
|
70 | 74 | "text/plain": [
|
71 | 75 | "<IPython.core.display.HTML object>"
|
|
77 | 81 | ],
|
78 | 82 | "source": [
|
79 | 83 | "import bigframes.ml.llm as llm\n",
|
80 |
| - "gemini_model = llm.GeminiTextGenerator(model_name=llm._GEMINI_1P5_FLASH_001_ENDPOINT)" |
| 84 | + "gemini_model = llm.GeminiTextGenerator(model_name=llm._GEMINI_1P5_FLASH_001_ENDPOINT)\n", |
| 85 | + "text_embedding_model = llm.TextEmbeddingGenerator(model_name=\"text-embedding-004\")" |
81 | 86 | ]
|
82 | 87 | },
|
83 | 88 | {
|
|
657 | 662 | "## Semantic Search"
|
658 | 663 | ]
|
659 | 664 | },
|
660 |
| - { |
661 |
| - "cell_type": "code", |
662 |
| - "execution_count": 11, |
663 |
| - "metadata": {}, |
664 |
| - "outputs": [ |
665 |
| - { |
666 |
| - "data": { |
667 |
| - "text/html": [ |
668 |
| - "Query job 48aafee2-4948-4677-ab02-a94a71b9f6e2 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:48aafee2-4948-4677-ab02-a94a71b9f6e2&page=queryresults\">Open Job</a>" |
669 |
| - ], |
670 |
| - "text/plain": [ |
671 |
| - "<IPython.core.display.HTML object>" |
672 |
| - ] |
673 |
| - }, |
674 |
| - "metadata": {}, |
675 |
| - "output_type": "display_data" |
676 |
| - } |
677 |
| - ], |
678 |
| - "source": [ |
679 |
| - "text_embedding_model = llm.TextEmbeddingGenerator(model_name=\"text-embedding-004\")" |
680 |
| - ] |
681 |
| - }, |
682 | 665 | {
|
683 | 666 | "cell_type": "code",
|
684 | 667 | "execution_count": 12,
|
|
1156 | 1139 | "agg_df = df.semantics.agg(\"Find the shared first name of actors in {Movies}. One word answer.\", model=gemini_model)\n",
|
1157 | 1140 | "agg_df"
|
1158 | 1141 | ]
|
| 1142 | + }, |
| 1143 | + { |
| 1144 | + "cell_type": "markdown", |
| 1145 | + "metadata": {}, |
| 1146 | + "source": [ |
| 1147 | + "## Semantic Cluster" |
| 1148 | + ] |
| 1149 | + }, |
| 1150 | + { |
| 1151 | + "cell_type": "code", |
| 1152 | + "execution_count": 5, |
| 1153 | + "metadata": {}, |
| 1154 | + "outputs": [ |
| 1155 | + { |
| 1156 | + "data": { |
| 1157 | + "text/html": [ |
| 1158 | + "Query job 92ce82b9-c521-42af-a2b7-6114b27a9ce4 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:92ce82b9-c521-42af-a2b7-6114b27a9ce4&page=queryresults\">Open Job</a>" |
| 1159 | + ], |
| 1160 | + "text/plain": [ |
| 1161 | + "<IPython.core.display.HTML object>" |
| 1162 | + ] |
| 1163 | + }, |
| 1164 | + "metadata": {}, |
| 1165 | + "output_type": "display_data" |
| 1166 | + }, |
| 1167 | + { |
| 1168 | + "name": "stderr", |
| 1169 | + "output_type": "stream", |
| 1170 | + "text": [ |
| 1171 | + "/usr/local/google/home/chelsealin/src/bigframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n", |
| 1172 | + " warnings.warn(\n" |
| 1173 | + ] |
| 1174 | + }, |
| 1175 | + { |
| 1176 | + "data": { |
| 1177 | + "text/html": [ |
| 1178 | + "Query job 8c4c7391-2889-4cf1-bbfa-5cbf6b144db5 is DONE. 10 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8c4c7391-2889-4cf1-bbfa-5cbf6b144db5&page=queryresults\">Open Job</a>" |
| 1179 | + ], |
| 1180 | + "text/plain": [ |
| 1181 | + "<IPython.core.display.HTML object>" |
| 1182 | + ] |
| 1183 | + }, |
| 1184 | + "metadata": {}, |
| 1185 | + "output_type": "display_data" |
| 1186 | + }, |
| 1187 | + { |
| 1188 | + "data": { |
| 1189 | + "text/html": [ |
| 1190 | + "Query job 19ae7cc6-3d61-4c69-9148-1956fafb577a is DONE. 30.8 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:19ae7cc6-3d61-4c69-9148-1956fafb577a&page=queryresults\">Open Job</a>" |
| 1191 | + ], |
| 1192 | + "text/plain": [ |
| 1193 | + "<IPython.core.display.HTML object>" |
| 1194 | + ] |
| 1195 | + }, |
| 1196 | + "metadata": {}, |
| 1197 | + "output_type": "display_data" |
| 1198 | + }, |
| 1199 | + { |
| 1200 | + "data": { |
| 1201 | + "text/html": [ |
| 1202 | + "Query job 7c2b62df-3bed-4469-9ffc-131843efe25e is DONE. 30.7 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7c2b62df-3bed-4469-9ffc-131843efe25e&page=queryresults\">Open Job</a>" |
| 1203 | + ], |
| 1204 | + "text/plain": [ |
| 1205 | + "<IPython.core.display.HTML object>" |
| 1206 | + ] |
| 1207 | + }, |
| 1208 | + "metadata": {}, |
| 1209 | + "output_type": "display_data" |
| 1210 | + }, |
| 1211 | + { |
| 1212 | + "data": { |
| 1213 | + "text/html": [ |
| 1214 | + "Query job 74155e34-d8ca-4fba-8b93-33b1b325a5f1 is DONE. 138.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:74155e34-d8ca-4fba-8b93-33b1b325a5f1&page=queryresults\">Open Job</a>" |
| 1215 | + ], |
| 1216 | + "text/plain": [ |
| 1217 | + "<IPython.core.display.HTML object>" |
| 1218 | + ] |
| 1219 | + }, |
| 1220 | + "metadata": {}, |
| 1221 | + "output_type": "display_data" |
| 1222 | + }, |
| 1223 | + { |
| 1224 | + "data": { |
| 1225 | + "text/html": [ |
| 1226 | + "Query job d9151043-a9c3-4388-8268-ef41162012b7 is DONE. 80 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d9151043-a9c3-4388-8268-ef41162012b7&page=queryresults\">Open Job</a>" |
| 1227 | + ], |
| 1228 | + "text/plain": [ |
| 1229 | + "<IPython.core.display.HTML object>" |
| 1230 | + ] |
| 1231 | + }, |
| 1232 | + "metadata": {}, |
| 1233 | + "output_type": "display_data" |
| 1234 | + }, |
| 1235 | + { |
| 1236 | + "data": { |
| 1237 | + "text/html": [ |
| 1238 | + "Query job d2c4ad9a-c637-490e-a2cf-37d7f5a34024 is DONE. 170 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d2c4ad9a-c637-490e-a2cf-37d7f5a34024&page=queryresults\">Open Job</a>" |
| 1239 | + ], |
| 1240 | + "text/plain": [ |
| 1241 | + "<IPython.core.display.HTML object>" |
| 1242 | + ] |
| 1243 | + }, |
| 1244 | + "metadata": {}, |
| 1245 | + "output_type": "display_data" |
| 1246 | + }, |
| 1247 | + { |
| 1248 | + "data": { |
| 1249 | + "text/html": [ |
| 1250 | + "<div>\n", |
| 1251 | + "<style scoped>\n", |
| 1252 | + " .dataframe tbody tr th:only-of-type {\n", |
| 1253 | + " vertical-align: middle;\n", |
| 1254 | + " }\n", |
| 1255 | + "\n", |
| 1256 | + " .dataframe tbody tr th {\n", |
| 1257 | + " vertical-align: top;\n", |
| 1258 | + " }\n", |
| 1259 | + "\n", |
| 1260 | + " .dataframe thead th {\n", |
| 1261 | + " text-align: right;\n", |
| 1262 | + " }\n", |
| 1263 | + "</style>\n", |
| 1264 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 1265 | + " <thead>\n", |
| 1266 | + " <tr style=\"text-align: right;\">\n", |
| 1267 | + " <th></th>\n", |
| 1268 | + " <th>Product</th>\n", |
| 1269 | + " <th>Cluster ID</th>\n", |
| 1270 | + " </tr>\n", |
| 1271 | + " </thead>\n", |
| 1272 | + " <tbody>\n", |
| 1273 | + " <tr>\n", |
| 1274 | + " <th>0</th>\n", |
| 1275 | + " <td>Smartphone</td>\n", |
| 1276 | + " <td>3</td>\n", |
| 1277 | + " </tr>\n", |
| 1278 | + " <tr>\n", |
| 1279 | + " <th>1</th>\n", |
| 1280 | + " <td>Laptop</td>\n", |
| 1281 | + " <td>3</td>\n", |
| 1282 | + " </tr>\n", |
| 1283 | + " <tr>\n", |
| 1284 | + " <th>2</th>\n", |
| 1285 | + " <td>Coffee Maker</td>\n", |
| 1286 | + " <td>1</td>\n", |
| 1287 | + " </tr>\n", |
| 1288 | + " <tr>\n", |
| 1289 | + " <th>3</th>\n", |
| 1290 | + " <td>T-shirt</td>\n", |
| 1291 | + " <td>2</td>\n", |
| 1292 | + " </tr>\n", |
| 1293 | + " <tr>\n", |
| 1294 | + " <th>4</th>\n", |
| 1295 | + " <td>Jeans</td>\n", |
| 1296 | + " <td>2</td>\n", |
| 1297 | + " </tr>\n", |
| 1298 | + " </tbody>\n", |
| 1299 | + "</table>\n", |
| 1300 | + "<p>5 rows × 2 columns</p>\n", |
| 1301 | + "</div>[5 rows x 2 columns in total]" |
| 1302 | + ], |
| 1303 | + "text/plain": [ |
| 1304 | + " Product Cluster ID\n", |
| 1305 | + "0 Smartphone 3\n", |
| 1306 | + "1 Laptop 3\n", |
| 1307 | + "2 Coffee Maker 1\n", |
| 1308 | + "3 T-shirt 2\n", |
| 1309 | + "4 Jeans 2\n", |
| 1310 | + "\n", |
| 1311 | + "[5 rows x 2 columns]" |
| 1312 | + ] |
| 1313 | + }, |
| 1314 | + "execution_count": 5, |
| 1315 | + "metadata": {}, |
| 1316 | + "output_type": "execute_result" |
| 1317 | + } |
| 1318 | + ], |
| 1319 | + "source": [ |
| 1320 | + "df = bpd.DataFrame({'Product': ['Smartphone', 'Laptop', 'Coffee Maker', 'T-shirt', 'Jeans']})\n", |
| 1321 | + "\n", |
| 1322 | + "df.semantics.cluster_by(column='Product', output_column='Cluster ID', model=text_embedding_model, n=3)" |
| 1323 | + ] |
1159 | 1324 | }
|
1160 | 1325 | ],
|
1161 | 1326 | "metadata": {
|
|
1174 | 1339 | "name": "python",
|
1175 | 1340 | "nbconvert_exporter": "python",
|
1176 | 1341 | "pygments_lexer": "ipython3",
|
1177 |
| - "version": "3.11.9" |
| 1342 | + "version": "3.12.1" |
1178 | 1343 | }
|
1179 | 1344 | },
|
1180 | 1345 | "nbformat": 4,
|
|
0 commit comments