-
Notifications
You must be signed in to change notification settings - Fork 800
[flytepropeller][flyteadmin] Streaming Decks V2 #6053
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
54aa165
9ed6b6e
4b4f6bd
dd774cb
0bb8e91
25fea29
4e24e91
8d1d0e4
31853bb
4068043
65b6efe
137579f
04f7fbc
aa56d64
a16851f
7314455
19498f5
74f595f
3bd3336
f6d8493
4b56e52
db4b19e
2737251
564dc5f
69ba94e
c992eae
0b91b5c
1d18265
96500c1
dd9dbaa
f51ff8c
bd5e682
561a43c
a33ba09
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -71,10 +71,43 @@ func getPluginMetricKey(pluginID, taskType string) string { | |||||||||||||||||
| return taskType + "_" + pluginID | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| func (p *pluginRequestedTransition) CacheHit(outputPath storage.DataReference, deckPath *storage.DataReference, entry catalog.Entry) { | ||||||||||||||||||
| func (p *pluginRequestedTransition) AddDeckURI(ctx context.Context, tCtx *taskExecutionContext) { | ||||||||||||||||||
| var deckURI *storage.DataReference | ||||||||||||||||||
| deckURIValue := tCtx.ow.GetDeckPath() | ||||||||||||||||||
| deckURI = &deckURIValue | ||||||||||||||||||
|
|
||||||||||||||||||
| if p.execInfo.OutputInfo == nil { | ||||||||||||||||||
| p.execInfo.OutputInfo = &handler.OutputInfo{} | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| p.execInfo.OutputInfo.DeckURI = deckURI | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // RemoveNonexistentDeckURI removes the deck URI from the plugin execution info if the URI does not exist in remote storage. | ||||||||||||||||||
| func (p *pluginRequestedTransition) RemoveNonexistentDeckURI(ctx context.Context, tCtx *taskExecutionContext) error { | ||||||||||||||||||
| reader := tCtx.ow.GetReader() | ||||||||||||||||||
| if reader == nil && p.execInfo.OutputInfo != nil { | ||||||||||||||||||
| p.execInfo.OutputInfo.DeckURI = nil | ||||||||||||||||||
| return nil | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| exists, err := reader.DeckExists(ctx) | ||||||||||||||||||
| if err != nil { | ||||||||||||||||||
| logger.Errorf(ctx, "Failed to check deck file existence. Error: %v", err) | ||||||||||||||||||
| return regErrors.Wrapf(err, "failed to check existence of deck file") | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| if !exists && p.execInfo.OutputInfo != nil { | ||||||||||||||||||
| p.execInfo.OutputInfo.DeckURI = nil | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| return nil | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| func (p *pluginRequestedTransition) CacheHit(outputPath storage.DataReference, entry catalog.Entry) { | ||||||||||||||||||
eapolinario marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||
| p.ttype = handler.TransitionTypeEphemeral | ||||||||||||||||||
| p.pInfo = pluginCore.PhaseInfoSuccess(nil) | ||||||||||||||||||
| p.ObserveSuccess(outputPath, deckPath, &event.TaskNodeMetadata{CacheStatus: entry.GetStatus().GetCacheStatus(), CatalogKey: entry.GetStatus().GetMetadata()}) | ||||||||||||||||||
| p.ObserveSuccess(outputPath, &event.TaskNodeMetadata{CacheStatus: entry.GetStatus().GetCacheStatus(), CatalogKey: entry.GetStatus().GetMetadata()}) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| func (p *pluginRequestedTransition) PopulateCacheInfo(entry catalog.Entry) { | ||||||||||||||||||
|
|
@@ -144,10 +177,13 @@ func (p *pluginRequestedTransition) FinalTaskEvent(input ToTaskExecutionEventInp | |||||||||||||||||
| return ToTaskExecutionEvent(input) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| func (p *pluginRequestedTransition) ObserveSuccess(outputPath storage.DataReference, deckPath *storage.DataReference, taskMetadata *event.TaskNodeMetadata) { | ||||||||||||||||||
| p.execInfo.OutputInfo = &handler.OutputInfo{ | ||||||||||||||||||
| OutputURI: outputPath, | ||||||||||||||||||
| DeckURI: deckPath, | ||||||||||||||||||
| func (p *pluginRequestedTransition) ObserveSuccess(outputPath storage.DataReference, taskMetadata *event.TaskNodeMetadata) { | ||||||||||||||||||
| if p.execInfo.OutputInfo == nil { | ||||||||||||||||||
| p.execInfo.OutputInfo = &handler.OutputInfo{ | ||||||||||||||||||
| OutputURI: outputPath, | ||||||||||||||||||
| } | ||||||||||||||||||
| } else { | ||||||||||||||||||
| p.execInfo.OutputInfo.OutputURI = outputPath | ||||||||||||||||||
eapolinario marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| p.execInfo.TaskNodeInfo = &handler.TaskNodeInfo{ | ||||||||||||||||||
|
|
@@ -171,7 +207,7 @@ func (p *pluginRequestedTransition) FinalTransition(ctx context.Context) (handle | |||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| logger.Debugf(ctx, "Task still running") | ||||||||||||||||||
| return handler.DoTransition(p.ttype, handler.PhaseInfoRunning(nil)), nil | ||||||||||||||||||
| return handler.DoTransition(p.ttype, handler.PhaseInfoRunning(&p.execInfo)), nil | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // The plugin interface available especially for testing. | ||||||||||||||||||
|
|
@@ -464,8 +500,19 @@ func (t Handler) invokePlugin(ctx context.Context, p pluginCore.Plugin, tCtx *ta | |||||||||||||||||
| } | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // Regardless of the observed phase, we always add the DeckUri to support real-time deck functionality. | ||||||||||||||||||
| // The deck should be accessible even if the task is still running or has failed. | ||||||||||||||||||
| // It's possible that the deck URI may not exist in remote storage yet or will never exist. | ||||||||||||||||||
| // So, it is console's responsibility to handle the case when the deck URI actually does not exist. | ||||||||||||||||||
| pluginTrns.AddDeckURI(ctx, tCtx) | ||||||||||||||||||
|
|
||||||||||||||||||
| switch pluginTrns.pInfo.Phase() { | ||||||||||||||||||
| case pluginCore.PhaseSuccess: | ||||||||||||||||||
| // This is to prevent the console from potentially checking the deck URI that does not exist if in final phase(PhaseSuccess). | ||||||||||||||||||
| err = pluginTrns.RemoveNonexistentDeckURI(ctx, tCtx) | ||||||||||||||||||
|
||||||||||||||||||
| func (r RemoteFileOutputReader) DeckExists(ctx context.Context) (bool, error) { | |
| md, err := r.store.Head(ctx, r.outPath.GetDeckPath()) | |
| if err != nil { | |
| return false, err | |
| } | |
| return md.Exists(), nil | |
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how do you know the performance degradation?
did you use grafana or other performance tools?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does the flyteadmin merge algorithm then remove the deckURI from the execution metadata?
flyteadmin will set the deckURI in the execution metadata to nil if the propeller removes it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cc @hamersaw
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just to just to close the loop on this, we're still making this HEAD call only when the node reaches the terminal phase.
Uh oh!
There was an error while loading. Please reload this page.