|
47 | 47 | ],
|
48 | 48 | "source": [
|
49 | 49 | "# Take multi-region US as the default BQ location, where most of the BQ data lies including the BQ public datasets\n",
|
50 |
| - "BQ_LOCATION = \"us\"\n", |
51 |
| - "PROJECT = \"bigframes-dev\"\n", |
| 50 | + "import os\n", |
| 51 | + "\n", |
| 52 | + "PROJECT_ID = os.environ.get(\"GOOGLE_CLOUD_PROJECT\")\n", |
| 53 | + "BQ_LOCATION = os.environ.get(\"BIGQUERY_LOCATION\")\n", |
| 54 | + "\n", |
| 55 | + "if not PROJECT_ID:\n", |
| 56 | + " raise ValueError(\"Project must be set via environment variable GOOGLE_CLOUD_PROJECT\")\n", |
| 57 | + "if not BQ_LOCATION:\n", |
| 58 | + " raise ValueError(\"BQ location must be set via environment variable BIGQUERY_LOCATION\")\n", |
| 59 | + "\n", |
52 | 60 | "DATASET = \"bigframes_testing\"\n",
|
53 | 61 | "PENGUINS_TABLE = \"bigquery-public-data.ml_datasets.penguins\"\n",
|
54 | 62 | "\n",
|
55 | 63 | "\n",
|
56 | 64 | "# Check for a location set in the environment and do location-specific setup if needed\n",
|
57 | 65 | "\n",
|
58 |
| - "import os\n", |
59 | 66 | "import google.api_core.exceptions\n",
|
60 | 67 | "from google.cloud import bigquery\n",
|
61 | 68 | "import bigframes\n",
|
62 |
| - " \n", |
63 |
| - "env_bq_location = os.getenv(\"BIGQUERY_LOCATION\")\n", |
64 |
| - "if env_bq_location and env_bq_location != BQ_LOCATION:\n", |
65 |
| - " BQ_LOCATION = env_bq_location.lower()\n", |
66 | 69 | "\n",
|
67 | 70 | "client = bigquery.Client()\n",
|
68 | 71 | "\n",
|
| 72 | + "BQ_LOCATION = BQ_LOCATION.lower()\n", |
69 | 73 | "if BQ_LOCATION != \"us\":\n",
|
70 | 74 | " bq_location_normalized = BQ_LOCATION.replace('-', '_')\n",
|
71 | 75 | "\n",
|
72 | 76 | " # Nominate a local penguins table\n",
|
73 | 77 | " penguins_table_ref = bigquery.TableReference.from_string(PENGUINS_TABLE)\n",
|
74 | 78 | " penguins_local_dataset_name = f\"{DATASET}_{bq_location_normalized}\"\n",
|
75 |
| - " penguins_local_dataset_ref = bigquery.DatasetReference(project=PROJECT, dataset_id=penguins_local_dataset_name)\n", |
| 79 | + " penguins_local_dataset_ref = bigquery.DatasetReference(project=PROJECT_ID, dataset_id=penguins_local_dataset_name)\n", |
76 | 80 | " penguins_local_dataset = bigquery.Dataset(penguins_local_dataset_ref)\n",
|
77 | 81 | " penguins_local_dataset.location = BQ_LOCATION\n",
|
78 | 82 | " penguins_local_table_ref= bigquery.TableReference(penguins_local_dataset, penguins_table_ref.table_id)\n",
|
|
94 | 98 | " DATASET = f\"{DATASET}_{bq_location_normalized}\"\n",
|
95 | 99 | "\n",
|
96 | 100 | "# Create the dataset to store the model if it doesn't exist \n",
|
97 |
| - "model_local_dataset = bigquery.Dataset(bigquery.DatasetReference(project=PROJECT, dataset_id=DATASET))\n", |
| 101 | + "model_local_dataset = bigquery.Dataset(bigquery.DatasetReference(project=PROJECT_ID, dataset_id=DATASET))\n", |
98 | 102 | "model_local_dataset.location = BQ_LOCATION\n",
|
99 | 103 | "model_dataset = client.create_dataset(model_local_dataset, exists_ok=True)\n",
|
100 | 104 | "\n",
|
101 | 105 | "# Finally log the variables driving the core notebook execution\n",
|
102 | 106 | "log = ('\\n'.join(f\"{name}: {str(value)}\" for name, value in {\n",
|
103 |
| - " \"BigQuery project\" : PROJECT,\n", |
| 107 | + " \"BigQuery project\" : PROJECT_ID,\n", |
104 | 108 | " \"BigQuery location\" : BQ_LOCATION,\n",
|
105 | 109 | " \"Penguins Table\" : PENGUINS_TABLE,\n",
|
106 | 110 | " \"ML Model Dataset\" : model_dataset.reference\n",
|
|
134 | 138 | "\n",
|
135 | 139 | "# Note: The project option is not required in all environments.\n",
|
136 | 140 | "# On BigQuery Studio, the project ID is automatically detected.\n",
|
137 |
| - "bigframes.pandas.options.bigquery.project = PROJECT\n", |
| 141 | + "bigframes.pandas.options.bigquery.project = PROJECT_ID\n", |
138 | 142 | "\n",
|
139 | 143 | "# Note: The location option is not required.\n",
|
140 | 144 | "# It defaults to the location of the first table or query\n",
|
|
0 commit comments