-
Notifications
You must be signed in to change notification settings - Fork 675
fix: Extend add_tensor_model so that ModelDeploymentCard can be correctly picked up #4169
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
cbbf7f2
f3f6bc3
a572af9
afd4c43
dcc356a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,9 +3,12 @@ | |
|
|
||
| use std::sync::Arc; | ||
|
|
||
| use dynamo_llm::{self as llm_rs}; | ||
| use llm_rs::model_card::ModelDeploymentCard as RsModelDeploymentCard; | ||
| use llm_rs::model_type::{ModelInput, ModelType}; | ||
| use pyo3::prelude::*; | ||
|
|
||
| use crate::{CancellationToken, engine::*, to_pyerr}; | ||
| use crate::{CancellationToken, engine::*, llm::local_model::ModelRuntimeConfig, to_pyerr}; | ||
|
|
||
| pub use dynamo_llm::grpc::service::kserve; | ||
|
|
||
|
|
@@ -56,12 +59,28 @@ impl KserveGrpcService { | |
| .map_err(to_pyerr) | ||
| } | ||
|
|
||
| #[pyo3(signature = (model, checksum, engine, runtime_config=None))] | ||
| pub fn add_tensor_model( | ||
| &self, | ||
| model: String, | ||
| checksum: String, | ||
| engine: PythonAsyncEngine, | ||
| runtime_config: Option<ModelRuntimeConfig>, | ||
| ) -> PyResult<()> { | ||
| // If runtime_config is provided, create and save a ModelDeploymentCard | ||
| // so the ModelConfig endpoint can return model configuration | ||
| if let Some(runtime_config) = runtime_config { | ||
| let mut card = RsModelDeploymentCard::with_name_only(&model); | ||
| card.model_type = ModelType::TensorBased; | ||
| card.model_input = ModelInput::Tensor; | ||
| card.runtime_config = runtime_config.inner; | ||
|
|
||
| self.inner | ||
| .model_manager() | ||
| .save_model_card(&model, card) | ||
| .map_err(to_pyerr)?; | ||
| } | ||
|
|
||
| let engine = Arc::new(engine); | ||
| self.inner | ||
| .model_manager() | ||
|
|
@@ -84,10 +103,17 @@ impl KserveGrpcService { | |
| } | ||
|
|
||
| pub fn remove_tensor_model(&self, model: String) -> PyResult<()> { | ||
| // Remove the engine | ||
| self.inner | ||
| .model_manager() | ||
| .remove_tensor_model(&model) | ||
| .map_err(to_pyerr) | ||
| .map_err(to_pyerr)?; | ||
|
|
||
| // Also remove the model card if it exists | ||
| // (It's ok if it doesn't exist since runtime_config is optional, we just ignore the None return) | ||
| let _ = self.inner.model_manager().remove_model_card(&model); | ||
|
Comment on lines
+112
to
+114
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In dynamo, when a non-tensor model is unloaded, the MDC persists in etcd. From my understanding, with these changes, tensor model MDCs are explicitly removed from etcd. Does this create inconsistent cleanup behavior between model types?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
@grahamking changed this behavior in the past month or two, now MDCs are associated with model instances and should get cleaned up when the instance goes away. |
||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| pub fn list_chat_completions_models(&self) -> PyResult<Vec<String>> { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add or append to an existing test so that we can verify the ModelConfig endpoint works when runtime_config is provided?