Skip to content

Commit c04308f

Browse files
committed
UPSTREAM: <carry>: add artifact storage api for backend
This change introduces a new api for artifact data retrieval for mlpipeline api server, to be consumed by UI and end users. Two new endpoints are introduced, allowing for 1) collection retrieval and 2) individual artifact metadata retrieval. When an individual artifact resource is retrieved, users/ui can specify a share_url option that provides a pre-signed url that with a very short expiry date that can be used as an artifact download link directly from the originating object store provider. This is currently only implemented for s3-compliant object store backends.
1 parent b775d3c commit c04308f

27 files changed

+2759
-80
lines changed

backend/api/v2beta1/artifacts.proto

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// Copyright 2024 The Kubeflow Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
syntax = "proto3";
16+
17+
package kubeflow.pipelines.backend.api.v2beta1;
18+
19+
import "google/api/annotations.proto";
20+
import "google/api/httpbody.proto";
21+
import "google/rpc/status.proto";
22+
import "google/protobuf/timestamp.proto";
23+
24+
option go_package = "github.com/kubeflow/pipelines/backend/api/v2beta1/go_client";
25+
26+
service ArtifactService {
27+
// Finds all artifacts within the specified namespace.
28+
// Namespace field is required. In multi-user mode, the caller
29+
// is required to have RBAC verb "list" on the "artifacts"
30+
// resource for the specified namespace.
31+
rpc ListArtifacts(ListArtifactRequest) returns (ListArtifactResponse) {
32+
option (google.api.http) = {
33+
get: "/apis/v2beta1/artifacts"
34+
};
35+
}
36+
37+
// Finds a specific Artifact by ID.
38+
rpc GetArtifact(GetArtifactRequest) returns (Artifact) {
39+
option (google.api.http) = {
40+
get: "/apis/v2beta1/artifacts/{artifact_id}"
41+
};
42+
}
43+
}
44+
45+
message GetArtifactRequest {
46+
// Required. The ID of the artifact to be retrieved.
47+
string artifact_id = 1;
48+
49+
enum ArtifactView {
50+
// Not specified, equivalent to BASIC.
51+
ARTIFACT_VIEW_UNSPECIFIED = 0;
52+
53+
// Server responses excludes download_url
54+
BASIC = 1;
55+
56+
// Server responses include download_url
57+
DOWNLOAD = 2;
58+
}
59+
60+
// Optional. Set to "DOWNLOAD" to included a signed URL with
61+
// an expiry (default 15 seconds, unless configured other wise).
62+
// This URL can be used to download the Artifact directly from
63+
// the Artifact's storage provider. Set to "BASIC" to exclude
64+
// the download_url from server responses, thus preventing the
65+
// creation of any signed url.
66+
// Defaults to BASIC.
67+
ArtifactView view = 2;
68+
}
69+
70+
// Passed onto MLMD ListOperationOptions
71+
// https://github.com/kubeflow/pipelines/blob/master/third_party/ml-metadata/ml_metadata/proto/metadata_store.proto#L868
72+
message ListArtifactRequest {
73+
// Optional.
74+
// Max number of resources to return in the result. A value of zero or less
75+
// will result in the default (20).
76+
// The API implementation also enforces an upper-bound of 100, and picks the
77+
// minimum between this value and the one specified here.
78+
// [default = 20]
79+
int32 max_result_size = 1;
80+
81+
enum Field {
82+
FIELD_UNSPECIFIED = 0;
83+
CREATE_TIME = 1;
84+
LAST_UPDATE_TIME = 2;
85+
ID = 3;
86+
}
87+
88+
// Optional. Ordering field. [default = ID]
89+
Field order_by_field = 2;
90+
// Optional. Can be either "asc" (ascending) or "dsc" (descending). [default = asc]
91+
string order_by = 3;
92+
93+
// Optional. The next_page_token value returned from a previous List request, if any.
94+
string next_page_token = 4;
95+
96+
// Required. Namespace of the Artifact's context.
97+
string namespace = 5;
98+
}
99+
100+
message ListArtifactResponse {
101+
// List of retrieved artifacts.
102+
repeated Artifact artifacts = 1;
103+
104+
// Token to retrieve the next page of results, or empty if there are none
105+
string next_page_token = 2;
106+
}
107+
108+
message Artifact {
109+
// Unique Artifact ID. Generated by MLMD.
110+
string artifact_id = 1;
111+
// Storage Provider to which this Artifact is located (e.g. S3, Minio, etc.).
112+
string storage_provider = 2;
113+
// The path location of this Artifact within the storage provider.
114+
// For example an object located at s3://my-bucket/path/a/b/c will
115+
// result in "path/a/b/c".
116+
string storage_path = 3;
117+
// The Artifact URI
118+
string uri = 4;
119+
// Optional Output. Specifies a signed-url that can be used to
120+
// download this Artifact directly from its store.
121+
string download_url = 5;
122+
// The namespace associated with this Artifact. This is determined
123+
// by the namespace of the parent PipelineRun that created this Artifact.
124+
string namespace = 6;
125+
// The MLMD type of the artifact (e.g. system.Dataset)
126+
string artifact_type = 7;
127+
// The size of the artifact in bytes.
128+
// If the artifact does not exist in object store (e.g. Metrics)
129+
// then this is omitted.
130+
int64 artifact_size = 8;
131+
// Creation time of the artifact.
132+
google.protobuf.Timestamp created_at = 9;
133+
// Last update time of the artifact.
134+
google.protobuf.Timestamp last_updated_at = 10;
135+
136+
// In case any error happens retrieving an artifact field, only artifact ID
137+
// and the error message is returned. Client has the flexibility of choosing
138+
// how to handle the error. This is especially useful when calling ListArtifacts.
139+
google.rpc.Status error = 11;
140+
}

0 commit comments

Comments
 (0)