Skip to content

Commit 6a42b77

Browse files
authored
Merge pull request GoogleCloudPlatform#147 from enakai00/model_serving_update
Update Notebooks for model_serving/caip-load-testing
2 parents 71c45ad + 4157d6c commit 6a42b77

File tree

6 files changed

+84
-54
lines changed

6 files changed

+84
-54
lines changed

model_serving/caip-load-testing/01-prepare-and-deploy.ipynb

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"source": [
2323
"## Setup\n",
2424
"\n",
25-
"This Notebook was tested on **AI Platform Notebooks** using the standard TF 2.2 image."
25+
"This Notebook was tested on **AI Platform Notebooks** using the standard TF 2.8 image."
2626
]
2727
},
2828
{
@@ -72,9 +72,7 @@
7272
"GCS_MODEL_LOCATION = 'gs://{}/models/{}/{}'.format(BUCKET, MODEL_NAME, MODEL_VERSION)\n",
7373
"THUB_MODEL_HANDLE = 'https://tfhub.dev/google/imagenet/resnet_v2_101/classification/4'\n",
7474
"IMAGENET_LABELS_URL = 'https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt'\n",
75-
"IMAGES_FOLDER = 'test_images'\n",
76-
"\n",
77-
"!gcloud config set project $PROJECT_ID"
75+
"IMAGES_FOLDER = 'test_images'"
7876
]
7977
},
8078
{
@@ -550,7 +548,7 @@
550548
"source": [
551549
"!gcloud ai-platform models create {MODEL_NAME} \\\n",
552550
" --project {PROJECT_ID} \\\n",
553-
" --regions {REGION}"
551+
" --region {REGION}"
554552
]
555553
},
556554
{
@@ -559,7 +557,9 @@
559557
"metadata": {},
560558
"outputs": [],
561559
"source": [
562-
"!gcloud ai-platform models list --project {PROJECT_ID} "
560+
"!gcloud ai-platform models list \\\n",
561+
" --project {PROJECT_ID} \\\n",
562+
" --region {REGION}"
563563
]
564564
},
565565
{
@@ -581,12 +581,13 @@
581581
"!gcloud beta ai-platform versions create {MODEL_VERSION} \\\n",
582582
" --model={MODEL_NAME} \\\n",
583583
" --origin={GCS_MODEL_LOCATION} \\\n",
584-
" --runtime-version=2.1 \\\n",
584+
" --runtime-version=2.8 \\\n",
585585
" --framework=TENSORFLOW \\\n",
586586
" --python-version=3.7 \\\n",
587587
" --machine-type={MACHINE_TYPE} \\\n",
588588
" --accelerator={ACCELERATOR} \\\n",
589-
" --project={PROJECT_ID}"
589+
" --project={PROJECT_ID} \\\n",
590+
" --region={REGION}"
590591
]
591592
},
592593
{
@@ -595,7 +596,8 @@
595596
"metadata": {},
596597
"outputs": [],
597598
"source": [
598-
"!gcloud ai-platform versions list --model={MODEL_NAME} --project={PROJECT_ID}"
599+
"!gcloud ai-platform versions list \\\n",
600+
" --model={MODEL_NAME} --project={PROJECT_ID} --region={REGION}"
599601
]
600602
},
601603
{
@@ -612,8 +614,14 @@
612614
"outputs": [],
613615
"source": [
614616
"import googleapiclient.discovery\n",
615-
"\n",
616-
"service = googleapiclient.discovery.build('ml', 'v1')\n",
617+
"from google.api_core.client_options import ClientOptions\n",
618+
"\n",
619+
"prefix = '{}-ml'.format(REGION) if REGION else 'ml'\n",
620+
"api_endpoint = 'https://{}.googleapis.com'.format(prefix)\n",
621+
"client_options = ClientOptions(api_endpoint=api_endpoint)\n",
622+
"service = googleapiclient.discovery.build('ml', 'v1',\n",
623+
" cache_discovery=False,\n",
624+
" client_options=client_options)\n",
617625
"name = 'projects/{}/models/{}/versions/{}'.format(PROJECT_ID, MODEL_NAME, MODEL_VERSION)\n",
618626
"print(\"Service name: {}\".format(name))\n",
619627
"\n",

model_serving/caip-load-testing/02-perf-testing.ipynb

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,16 @@
1919
"metadata": {},
2020
"source": [
2121
"## Setup\n",
22-
"This notebook was tested on **AI Platform Notebooks** using the standard TF 2.2 image."
22+
"This notebook was tested on **AI Platform Notebooks** using the standard TF 2.8 image."
2323
]
2424
},
2525
{
2626
"cell_type": "markdown",
2727
"metadata": {},
2828
"source": [
29-
"### Install required packages"
29+
"### Install required packages\n",
30+
"\n",
31+
"You can safely ignore the dependency errors. Confirm the last message starting from \"Successfully installed...\""
3032
]
3133
},
3234
{
@@ -35,7 +37,10 @@
3537
"metadata": {},
3638
"outputs": [],
3739
"source": [
38-
"%pip install -q -U locust google-cloud-monitoring google-cloud-logging google-cloud-monitoring-dashboards"
40+
"!pip install --user locust==2.11.1\\\n",
41+
" google-cloud-monitoring==2.11.1\\\n",
42+
" google-cloud-logging==3.2.2\\\n",
43+
" google-cloud-monitoring-dashboards==2.7.2"
3944
]
4045
},
4146
{
@@ -80,11 +85,11 @@
8085
"from google.api_core.exceptions import GoogleAPICallError \n",
8186
"\n",
8287
"from google.cloud import logging_v2\n",
83-
"from google.cloud.logging_v2 import MetricsServiceV2Client\n",
84-
"from google.cloud.logging_v2 import LoggingServiceV2Client\n",
88+
"from google.cloud.logging_v2.services.metrics_service_v2 import MetricsServiceV2Client\n",
89+
"from google.cloud.logging_v2.services.logging_service_v2 import LoggingServiceV2Client\n",
8590
"\n",
86-
"from google.cloud.monitoring_dashboard.v1.types import Dashboard\n",
87-
"from google.cloud.monitoring_dashboard.v1 import DashboardsServiceClient\n",
91+
"from google.cloud.monitoring_dashboard_v1.types import Dashboard\n",
92+
"from google.cloud.monitoring_dashboard_v1 import DashboardsServiceClient\n",
8893
"from google.cloud.monitoring_v3 import MetricServiceClient\n",
8994
"from google.cloud.monitoring_v3.query import Query\n",
9095
"from google.cloud.monitoring_v3.types import TimeInterval\n",
@@ -160,7 +165,7 @@
160165
" value_field:str, \n",
161166
" bucket_bounds:List[int]):\n",
162167
" \n",
163-
" metric_path = logging_client.metric_path(PROJECT_ID, metric_name)\n",
168+
" metric_path = logging_client.log_metric_path(PROJECT_ID, metric_name)\n",
164169
" log_entry_filter = 'resource.type=global AND logName={}'.format(log_path)\n",
165170
" \n",
166171
" metric_descriptor = {\n",
@@ -203,7 +208,11 @@
203208
" logging_client.get_log_metric(metric_path)\n",
204209
" print('Metric: {} already exists'.format(metric_path))\n",
205210
" except:\n",
206-
" logging_client.create_log_metric(parent, metric)\n",
211+
" request = logging_v2.types.logging_metrics.CreateLogMetricRequest(\n",
212+
" parent=parent,\n",
213+
" metric=metric,\n",
214+
" )\n",
215+
" logging_client.create_log_metric(request)\n",
207216
" print('Created metric {}'.format(metric_path))"
208217
]
209218
},
@@ -225,7 +234,7 @@
225234
"creds , _ = google.auth.default()\n",
226235
"logging_client = MetricsServiceV2Client(credentials=creds)\n",
227236
"\n",
228-
"parent = logging_client.project_path(PROJECT_ID)\n",
237+
"parent = logging_client.common_project_path(PROJECT_ID)\n",
229238
"log_path = LoggingServiceV2Client.log_path(PROJECT_ID, log_name)"
230239
]
231240
},
@@ -284,12 +293,13 @@
284293
"metadata": {},
285294
"outputs": [],
286295
"source": [
287-
"metrics = logging_client.list_log_metrics(parent)\n",
296+
"request = {'parent': parent}\n",
297+
"metrics = logging_client.list_log_metrics(request)\n",
288298
"\n",
289299
"if not list(metrics):\n",
290300
" print(\"There are not any log based metrics defined in the the project\")\n",
291301
"else:\n",
292-
" for element in logging_client.list_log_metrics(parent):\n",
302+
" for element in logging_client.list_log_metrics(request):\n",
293303
" print(element.metric_descriptor.name)"
294304
]
295305
},
@@ -337,8 +347,12 @@
337347
"outputs": [],
338348
"source": [
339349
"dashboard_proto = Dashboard()\n",
340-
"dashboard_proto = ParseDict(dashboard_template, dashboard_proto)\n",
341-
"dashboard = dashboard_service_client.create_dashboard(parent, dashboard_proto)"
350+
"request = {\n",
351+
" 'parent': parent,\n",
352+
" 'dashboard': dashboard_proto,\n",
353+
"}\n",
354+
"dashboard_proto = ParseDict(dashboard_template, dashboard_proto._pb)\n",
355+
"dashboard = dashboard_service_client.create_dashboard(request)"
342356
]
343357
},
344358
{
@@ -347,7 +361,7 @@
347361
"metadata": {},
348362
"outputs": [],
349363
"source": [
350-
"for dashboard in dashboard_service_client.list_dashboards(parent):\n",
364+
"for dashboard in dashboard_service_client.list_dashboards({'parent': parent}):\n",
351365
" print('Dashboard name: {}, Dashboard ID: {}'.format(dashboard.display_name, dashboard.name))"
352366
]
353367
},
@@ -357,7 +371,7 @@
357371
"source": [
358372
"## 3. Deploying Locust to a GKE cluster\n",
359373
"\n",
360-
"Before proceeding, you need access to a GKE cluster. The described deployment process can deploy Locust to any GKE cluster as long as there are enough compute resources to support your Locust configuration. The default configuration follows the Locust's best practices and requests one processor core and 4Gi of memory for the Locust master and one processor core and 2Gi of memory for each Locust worker. As you run your tests, it is important to monitor the the master and the workers for resource utilization and fine tune the allocated resources as required.\n",
374+
"Before proceeding, you need access to a GKE cluster. You can find a command to create a GKE cluster in [Environment setup](https://github.com/GoogleCloudPlatform/mlops-on-gcp/blob/master/model_serving/caip-load-testing/README.md#environment-setup) section of [README.md](https://github.com/GoogleCloudPlatform/mlops-on-gcp/blob/master/model_serving/caip-load-testing/README.md). The described deployment process can deploy Locust to any GKE cluster as long as there are enough compute resources to support your Locust configuration. The default configuration follows the Locust's best practices and requests one processor core and 4Gi of memory for the Locust master and one processor core and 2Gi of memory for each Locust worker. As you run your tests, it is important to monitor the the master and the workers for resource utilization and fine tune the allocated resources as required.\n",
361375
"\n",
362376
"The deployment process has been streamlined using [Kustomize](https://kustomize.io/). As described in the following steps, you can fine tune the baseline configuration by modifying the default `kustomization.yaml` and `patch.yaml` files in the `locust/manifests` folder.\n",
363377
"\n"
@@ -623,10 +637,10 @@
623637
"source": [
624638
"You can try using the following parameter configurations:\n",
625639
"1. Number of total users to simulate: 152\n",
626-
"2. Hatch rate: 1\n",
627-
"3. Host: http://ml.googleapis.com\n",
628-
"4. Number of users to increase by step: 8\n",
629-
"5. Step duration: 1m "
640+
"2. Spawn rate: 1\n",
641+
"3. Host: `http://[your-region]-ml.googleapis.com`\n",
642+
"\n",
643+
"**NOTE**: `[your-region]` is the region for deploying the model that you configured as `REGION` in the first notebook. "
630644
]
631645
},
632646
{

model_serving/caip-load-testing/03-analyze-results.ipynb

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@
5151
"import google.auth\n",
5252
"\n",
5353
"from google.cloud import logging_v2\n",
54-
"from google.cloud.monitoring_dashboard.v1 import DashboardsServiceClient\n",
55-
"from google.cloud.logging_v2 import MetricsServiceV2Client\n",
54+
"from google.cloud.monitoring_dashboard_v1 import DashboardsServiceClient\n",
55+
"from google.cloud.logging_v2.services.metrics_service_v2 import MetricsServiceV2Client\n",
5656
"from google.cloud.monitoring_v3.query import Query\n",
5757
"from google.cloud.monitoring_v3 import MetricServiceClient\n",
5858
"\n",
@@ -108,10 +108,11 @@
108108
"creds , _ = google.auth.default()\n",
109109
"client = MetricServiceClient(credentials=creds)\n",
110110
"\n",
111-
"project_path = client.project_path(PROJECT_ID)\n",
111+
"project_path = client.common_project_path(PROJECT_ID)\n",
112112
"filter = 'metric.type=starts_with(\"ml.googleapis.com/prediction\")'\n",
113113
"\n",
114-
"for descriptor in client.list_metric_descriptors(project_path, filter_=filter):\n",
114+
"request = {'name': project_path, 'filter': filter}\n",
115+
"for descriptor in client.list_metric_descriptors(request):\n",
115116
" print(descriptor.type)"
116117
]
117118
},
@@ -130,7 +131,8 @@
130131
"source": [
131132
"filter = 'metric.type=starts_with(\"logging.googleapis.com/user\")'\n",
132133
"\n",
133-
"for descriptor in client.list_metric_descriptors(project_path, filter_=filter):\n",
134+
"request = {'name': project_path, 'filter': filter}\n",
135+
"for descriptor in client.list_metric_descriptors(request):\n",
134136
" print(descriptor.type)"
135137
]
136138
},
@@ -354,7 +356,8 @@
354356
"metadata": {},
355357
"outputs": [],
356358
"source": [
357-
"latency_results = test_result[['Latency: model', 'Latency: client']]\n",
359+
"latency_results = test_result[\n",
360+
" [x[0] for x in test_result.columns if x[0].startswith('Latency:')]]\n",
358361
"latency_results.columns = latency_results.columns.get_level_values(0)\n",
359362
"ax = latency_results.plot(figsize=(14, 9), legend=True)\n",
360363
"ax.set_xlabel('Time', fontsize=16)\n",
@@ -377,12 +380,12 @@
377380
"metadata": {},
378381
"outputs": [],
379382
"source": [
380-
"throughput_results = test_result[['response_rate', 'User count']]\n",
383+
"throughput_results = test_result[['response_rate']]\n",
381384
"throughput_results.columns = throughput_results.columns.get_level_values(0)\n",
382385
"ax = throughput_results.plot(figsize=(14, 9), legend=True)\n",
383386
"ax.set_xlabel('Time', fontsize=16)\n",
384387
"ax.set_ylabel('Count', fontsize=16)\n",
385-
"_ = ax.set_title(\"Response Rate vs User Count\", fontsize=20)"
388+
"_ = ax.set_title(\"Response Rate\", fontsize=20)"
386389
]
387390
},
388391
{
@@ -399,11 +402,11 @@
399402
"outputs": [],
400403
"source": [
401404
"logging_client = MetricsServiceV2Client(credentials=creds)\n",
402-
"parent = logging_client.project_path(PROJECT_ID)\n",
405+
"parent = logging_client.common_project_path(PROJECT_ID)\n",
403406
"\n",
404-
"for element in logging_client.list_log_metrics(parent):\n",
405-
" metric_path = logging_client.metric_path(PROJECT_ID, element.name)\n",
406-
" logging_client.delete_log_metric(metric_path)\n",
407+
"for element in logging_client.list_log_metrics({'parent': parent}):\n",
408+
" metric_path = logging_client.log_metric_path(PROJECT_ID, element.name)\n",
409+
" logging_client.delete_log_metric({'metric_name': metric_path})\n",
407410
" print(\"Deleted metric: \", metric_path)"
408411
]
409412
},
@@ -416,9 +419,9 @@
416419
"display_name = 'AI Platform Prediction and Locust'\n",
417420
"dashboard_service_client = DashboardsServiceClient(credentials=creds)\n",
418421
"parent = 'projects/{}'.format(PROJECT_ID)\n",
419-
"for dashboard in dashboard_service_client.list_dashboards(parent):\n",
422+
"for dashboard in dashboard_service_client.list_dashboards({'parent': parent}):\n",
420423
" if dashboard.display_name == display_name:\n",
421-
" dashboard_service_client.delete_dashboard(dashboard.name)\n",
424+
" dashboard_service_client.delete_dashboard({'name': dashboard.name})\n",
422425
" print(\"Deleted dashboard:\", dashboard.name)"
423426
]
424427
},

model_serving/caip-load-testing/README.md

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,14 @@ In addition to the Notebooks, the directory includes the following artifacts:
6868

6969
## Environment setup
7070

71-
1. Create a [Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets).
72-
2. Create a [Cloud Monitoring Workspace](https://cloud.google.com/monitoring/workspaces/create) in your project.
73-
3. Create a [Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine/docs/how-to/creating-a-cluster) cluster with the required CPUs.
71+
1. Enable the following APIs
72+
- Kubernetes Engine API
73+
- Cloud Build API
74+
- AI Platform Training & Prediction API
75+
- Notebooks API
76+
2. Create a [Cloud Storage bucket](https://cloud.google.com/storage/docs/creating-buckets).
77+
3. Create a [Cloud Monitoring Workspace](https://cloud.google.com/monitoring/workspaces/create) in your project.
78+
4. Create a [Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine/docs/how-to/creating-a-cluster) cluster with the required CPUs.
7479
The node pool must have access to the Cloud APIs.
7580
```
7681
PROJECT_ID=[YOUR-GCP-PROJECT-ID]
@@ -79,15 +84,15 @@ The node pool must have access to the Cloud APIs.
7984
MACHINE_TYPE=n1-standard-8
8085
SIZE=5
8186

82-
gcloud beta container --project=$PROJECT clusters create $CLUSTER_NAME \
87+
gcloud beta container --project=$PROJECT_ID clusters create $CLUSTER_NAME \
8388
--zone=$ZONE \
8489
--machine-type=$MACHINE_TYPE \
8590
--num-nodes=$SIZE \
8691
--scopes=cloud-platform
8792
```
88-
4. Create an [AI Notebooks instance](https://cloud.google.com/ai-platform/notebooks/docs/create-new) TensorFlow 2.2.
89-
5. Open the JupyterLab from the AI Notebook instance.
90-
6. Open a new Terminal to execute the following commands to clone the repository:
93+
5. Create an [AI Notebooks instance](https://cloud.google.com/ai-platform/notebooks/docs/create-new) TensorFlow 2.8.
94+
6. Open the JupyterLab from the AI Notebook instance.
95+
7. Open a new Terminal to execute the following commands to clone the repository:
9196
```
9297
git clone https://github.com/GoogleCloudPlatform/mlops-on-gcp
9398
cd mlops-on-gcp/model_serving/caip-load-testing

model_serving/caip-load-testing/locust/locust-image/task.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from google.api_core.exceptions import GoogleAPICallError
3939
from google.api_core.exceptions import RetryError
4040
from google.auth.transport.requests import AuthorizedSession
41-
from google.cloud.logging_v2 import LoggingServiceV2Client
41+
from google.cloud.logging_v2.services.logging_service_v2 import LoggingServiceV2Client
4242
from google.cloud.logging_v2.types import LogEntry
4343
from google.protobuf.timestamp_pb2 import Timestamp
4444
from google.protobuf.struct_pb2 import Struct

model_serving/caip-load-testing/locust/manifests/locust-master.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ spec:
3232
containers:
3333
- image: locustio/locust
3434
name: locust-master
35-
args: ["-f", "/tasks/task.py", "--master", "--step-load"]
35+
args: ["-f", "/tasks/task.py", "--master"]
3636
envFrom:
3737
- configMapRef:
3838
name: test-config-locations

0 commit comments

Comments
 (0)