Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions api/openapi-spec/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -14036,6 +14036,69 @@
}
}
},
"trainer.v1alpha1.ProgressionStatus": {
"description": "ProgressionStatus represents the training progression status read from rank 0 node.",
"type": "object",
"properties": {
"currentEpoch": {
"description": "CurrentEpoch is the current training epoch.",
"type": "integer",
"format": "int64"
},
"currentStep": {
"description": "CurrentStep is the current training step/iteration.",
"type": "integer",
"format": "int64"
},
"estimatedTimeRemaining": {
"description": "EstimatedTimeRemaining is the estimated time remaining in seconds.",
"type": "integer",
"format": "int64"
},
"lastUpdateTime": {
"description": "LastUpdateTime is the timestamp when the progression was last updated.",
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.apimachinery.pkg.apis.meta.v1.Time"
}
]
},
"message": {
"description": "Message provides additional information about the training progression.",
"type": "string"
},
"metrics": {
"description": "Metrics contains additional training metrics as key-value pairs.",
"type": "object",
"additionalProperties": {
"type": "string",
"default": ""
}
},
"percentageComplete": {
"description": "PercentageComplete represents the completion percentage (0-100) as a string.",
"type": "string"
},
"totalEpochs": {
"description": "TotalEpochs is the total number of training epochs.",
"type": "integer",
"format": "int64"
},
"totalSteps": {
"description": "TotalSteps is the total number of training steps/iterations.",
"type": "integer",
"format": "int64"
},
"trainingMetrics": {
"description": "TrainingMetrics contains structured training metrics.",
"allOf": [
{
"$ref": "#/components/schemas/trainer.v1alpha1.TrainingMetrics"
}
]
}
}
},
"trainer.v1alpha1.RuntimeRef": {
"description": "RuntimeRef represents the reference to the existing training runtime.",
"type": "object",
Expand Down Expand Up @@ -14301,6 +14364,14 @@
"name"
],
"x-kubernetes-list-type": "map"
},
"progressionStatus": {
"description": "ProgressionStatus tracks the training progression from rank 0 node.",
"allOf": [
{
"$ref": "#/components/schemas/trainer.v1alpha1.ProgressionStatus"
}
]
}
}
},
Expand Down Expand Up @@ -14369,6 +14440,33 @@
}
}
},
"trainer.v1alpha1.TrainingMetrics": {
"description": "TrainingMetrics represents structured training metrics.",
"type": "object",
"properties": {
"accuracy": {
"description": "Accuracy represents the current model accuracy.",
"type": "string"
},
"checkpointsStored": {
"description": "CheckpointsStored represents the number of checkpoints stored.",
"type": "integer",
"format": "int64"
},
"latestCheckpointPath": {
"description": "LatestCheckpointPath represents the path to the latest checkpoint file.",
"type": "string"
},
"learningRate": {
"description": "LearningRate represents the current learning rate.",
"type": "string"
},
"loss": {
"description": "Loss represents the current training loss.",
"type": "string"
}
}
},
"trainer.v1alpha1.TrainingRuntime": {
"description": "TrainingRuntime represents a training runtime which can be referenced as part of `runtimeRef` API in TrainJob. This resource is a namespaced-scoped and can be referenced by TrainJob that created in the *same* namespace as the TrainingRuntime.",
"type": "object",
Expand Down
2 changes: 2 additions & 0 deletions api/python_api/kubeflow_trainer_api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@
from kubeflow_trainer_api.models.trainer_v1alpha1_pod_group_policy_source import TrainerV1alpha1PodGroupPolicySource
from kubeflow_trainer_api.models.trainer_v1alpha1_pod_spec_override import TrainerV1alpha1PodSpecOverride
from kubeflow_trainer_api.models.trainer_v1alpha1_pod_spec_override_target_job import TrainerV1alpha1PodSpecOverrideTargetJob
from kubeflow_trainer_api.models.trainer_v1alpha1_progression_status import TrainerV1alpha1ProgressionStatus
from kubeflow_trainer_api.models.trainer_v1alpha1_runtime_ref import TrainerV1alpha1RuntimeRef
from kubeflow_trainer_api.models.trainer_v1alpha1_torch_elastic_policy import TrainerV1alpha1TorchElasticPolicy
from kubeflow_trainer_api.models.trainer_v1alpha1_torch_ml_policy_source import TrainerV1alpha1TorchMLPolicySource
Expand All @@ -376,6 +377,7 @@
from kubeflow_trainer_api.models.trainer_v1alpha1_train_job_spec import TrainerV1alpha1TrainJobSpec
from kubeflow_trainer_api.models.trainer_v1alpha1_train_job_status import TrainerV1alpha1TrainJobStatus
from kubeflow_trainer_api.models.trainer_v1alpha1_trainer import TrainerV1alpha1Trainer
from kubeflow_trainer_api.models.trainer_v1alpha1_training_metrics import TrainerV1alpha1TrainingMetrics
from kubeflow_trainer_api.models.trainer_v1alpha1_training_runtime import TrainerV1alpha1TrainingRuntime
from kubeflow_trainer_api.models.trainer_v1alpha1_training_runtime_list import TrainerV1alpha1TrainingRuntimeList
from kubeflow_trainer_api.models.trainer_v1alpha1_training_runtime_spec import TrainerV1alpha1TrainingRuntimeSpec
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# coding: utf-8

"""
Kubeflow Trainer OpenAPI Spec

No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)

The version of the OpenAPI document: unversioned
Generated by OpenAPI Generator (https://openapi-generator.tech)

Do not edit the class manually.
""" # noqa: E501


from __future__ import annotations
import pprint
import re # noqa: F401
import json

from datetime import datetime
from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
from typing import Any, ClassVar, Dict, List, Optional
from kubeflow_trainer_api.models.trainer_v1alpha1_training_metrics import TrainerV1alpha1TrainingMetrics
from typing import Optional, Set
from typing_extensions import Self

class TrainerV1alpha1ProgressionStatus(BaseModel):
"""
ProgressionStatus represents the training progression status read from rank 0 node.
""" # noqa: E501
current_epoch: Optional[StrictInt] = Field(default=None, description="CurrentEpoch is the current training epoch.", alias="currentEpoch")
current_step: Optional[StrictInt] = Field(default=None, description="CurrentStep is the current training step/iteration.", alias="currentStep")
estimated_time_remaining: Optional[StrictInt] = Field(default=None, description="EstimatedTimeRemaining is the estimated time remaining in seconds.", alias="estimatedTimeRemaining")
last_update_time: Optional[datetime] = Field(default=None, description="LastUpdateTime is the timestamp when the progression was last updated.", alias="lastUpdateTime")
message: Optional[StrictStr] = Field(default=None, description="Message provides additional information about the training progression.")
metrics: Optional[Dict[str, StrictStr]] = Field(default=None, description="Metrics contains additional training metrics as key-value pairs.")
percentage_complete: Optional[StrictStr] = Field(default=None, description="PercentageComplete represents the completion percentage (0-100) as a string.", alias="percentageComplete")
total_epochs: Optional[StrictInt] = Field(default=None, description="TotalEpochs is the total number of training epochs.", alias="totalEpochs")
total_steps: Optional[StrictInt] = Field(default=None, description="TotalSteps is the total number of training steps/iterations.", alias="totalSteps")
training_metrics: Optional[TrainerV1alpha1TrainingMetrics] = Field(default=None, description="TrainingMetrics contains structured training metrics.", alias="trainingMetrics")
__properties: ClassVar[List[str]] = ["currentEpoch", "currentStep", "estimatedTimeRemaining", "lastUpdateTime", "message", "metrics", "percentageComplete", "totalEpochs", "totalSteps", "trainingMetrics"]

model_config = ConfigDict(
populate_by_name=True,
validate_assignment=True,
protected_namespaces=(),
)


def to_str(self) -> str:
"""Returns the string representation of the model using alias"""
return pprint.pformat(self.model_dump(by_alias=True))

def to_json(self) -> str:
"""Returns the JSON representation of the model using alias"""
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
return json.dumps(self.to_dict())

@classmethod
def from_json(cls, json_str: str) -> Optional[Self]:
"""Create an instance of TrainerV1alpha1ProgressionStatus from a JSON string"""
return cls.from_dict(json.loads(json_str))

def to_dict(self) -> Dict[str, Any]:
"""Return the dictionary representation of the model using alias.

This has the following differences from calling pydantic's
`self.model_dump(by_alias=True)`:

* `None` is only added to the output dict for nullable fields that
were set at model initialization. Other fields with value `None`
are ignored.
"""
excluded_fields: Set[str] = set([
])

_dict = self.model_dump(
by_alias=True,
exclude=excluded_fields,
exclude_none=True,
)
# override the default output from pydantic by calling `to_dict()` of training_metrics
if self.training_metrics:
_dict['trainingMetrics'] = self.training_metrics.to_dict()
return _dict

@classmethod
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
"""Create an instance of TrainerV1alpha1ProgressionStatus from a dict"""
if obj is None:
return None

if not isinstance(obj, dict):
return cls.model_validate(obj)

_obj = cls.model_validate({
"currentEpoch": obj.get("currentEpoch"),
"currentStep": obj.get("currentStep"),
"estimatedTimeRemaining": obj.get("estimatedTimeRemaining"),
"lastUpdateTime": obj.get("lastUpdateTime"),
"message": obj.get("message"),
"metrics": obj.get("metrics"),
"percentageComplete": obj.get("percentageComplete"),
"totalEpochs": obj.get("totalEpochs"),
"totalSteps": obj.get("totalSteps"),
"trainingMetrics": TrainerV1alpha1TrainingMetrics.from_dict(obj["trainingMetrics"]) if obj.get("trainingMetrics") is not None else None
})
return _obj


Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from typing import Any, ClassVar, Dict, List, Optional
from kubeflow_trainer_api.models.io_k8s_apimachinery_pkg_apis_meta_v1_condition import IoK8sApimachineryPkgApisMetaV1Condition
from kubeflow_trainer_api.models.trainer_v1alpha1_job_status import TrainerV1alpha1JobStatus
from kubeflow_trainer_api.models.trainer_v1alpha1_progression_status import TrainerV1alpha1ProgressionStatus
from typing import Optional, Set
from typing_extensions import Self

Expand All @@ -30,7 +31,8 @@ class TrainerV1alpha1TrainJobStatus(BaseModel):
""" # noqa: E501
conditions: Optional[List[IoK8sApimachineryPkgApisMetaV1Condition]] = Field(default=None, description="Conditions for the TrainJob.")
jobs_status: Optional[List[TrainerV1alpha1JobStatus]] = Field(default=None, description="JobsStatus tracks the child Jobs in TrainJob.", alias="jobsStatus")
__properties: ClassVar[List[str]] = ["conditions", "jobsStatus"]
progression_status: Optional[TrainerV1alpha1ProgressionStatus] = Field(default=None, description="ProgressionStatus tracks the training progression from rank 0 node.", alias="progressionStatus")
__properties: ClassVar[List[str]] = ["conditions", "jobsStatus", "progressionStatus"]

model_config = ConfigDict(
populate_by_name=True,
Expand Down Expand Up @@ -85,6 +87,9 @@ def to_dict(self) -> Dict[str, Any]:
if _item_jobs_status:
_items.append(_item_jobs_status.to_dict())
_dict['jobsStatus'] = _items
# override the default output from pydantic by calling `to_dict()` of progression_status
if self.progression_status:
_dict['progressionStatus'] = self.progression_status.to_dict()
return _dict

@classmethod
Expand All @@ -98,7 +103,8 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:

_obj = cls.model_validate({
"conditions": [IoK8sApimachineryPkgApisMetaV1Condition.from_dict(_item) for _item in obj["conditions"]] if obj.get("conditions") is not None else None,
"jobsStatus": [TrainerV1alpha1JobStatus.from_dict(_item) for _item in obj["jobsStatus"]] if obj.get("jobsStatus") is not None else None
"jobsStatus": [TrainerV1alpha1JobStatus.from_dict(_item) for _item in obj["jobsStatus"]] if obj.get("jobsStatus") is not None else None,
"progressionStatus": TrainerV1alpha1ProgressionStatus.from_dict(obj["progressionStatus"]) if obj.get("progressionStatus") is not None else None
})
return _obj

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# coding: utf-8

"""
Kubeflow Trainer OpenAPI Spec

No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)

The version of the OpenAPI document: unversioned
Generated by OpenAPI Generator (https://openapi-generator.tech)

Do not edit the class manually.
""" # noqa: E501


from __future__ import annotations
import pprint
import re # noqa: F401
import json

from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
from typing import Any, ClassVar, Dict, List, Optional
from typing import Optional, Set
from typing_extensions import Self

class TrainerV1alpha1TrainingMetrics(BaseModel):
"""
TrainingMetrics represents structured training metrics.
""" # noqa: E501
accuracy: Optional[StrictStr] = Field(default=None, description="Accuracy represents the current model accuracy.")
checkpoints_stored: Optional[StrictInt] = Field(default=None, description="CheckpointsStored represents the number of checkpoints stored.", alias="checkpointsStored")
latest_checkpoint_path: Optional[StrictStr] = Field(default=None, description="LatestCheckpointPath represents the path to the latest checkpoint file.", alias="latestCheckpointPath")
learning_rate: Optional[StrictStr] = Field(default=None, description="LearningRate represents the current learning rate.", alias="learningRate")
loss: Optional[StrictStr] = Field(default=None, description="Loss represents the current training loss.")
__properties: ClassVar[List[str]] = ["accuracy", "checkpointsStored", "latestCheckpointPath", "learningRate", "loss"]

model_config = ConfigDict(
populate_by_name=True,
validate_assignment=True,
protected_namespaces=(),
)


def to_str(self) -> str:
"""Returns the string representation of the model using alias"""
return pprint.pformat(self.model_dump(by_alias=True))

def to_json(self) -> str:
"""Returns the JSON representation of the model using alias"""
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
return json.dumps(self.to_dict())

@classmethod
def from_json(cls, json_str: str) -> Optional[Self]:
"""Create an instance of TrainerV1alpha1TrainingMetrics from a JSON string"""
return cls.from_dict(json.loads(json_str))

def to_dict(self) -> Dict[str, Any]:
"""Return the dictionary representation of the model using alias.

This has the following differences from calling pydantic's
`self.model_dump(by_alias=True)`:

* `None` is only added to the output dict for nullable fields that
were set at model initialization. Other fields with value `None`
are ignored.
"""
excluded_fields: Set[str] = set([
])

_dict = self.model_dump(
by_alias=True,
exclude=excluded_fields,
exclude_none=True,
)
return _dict

@classmethod
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
"""Create an instance of TrainerV1alpha1TrainingMetrics from a dict"""
if obj is None:
return None

if not isinstance(obj, dict):
return cls.model_validate(obj)

_obj = cls.model_validate({
"accuracy": obj.get("accuracy"),
"checkpointsStored": obj.get("checkpointsStored"),
"latestCheckpointPath": obj.get("latestCheckpointPath"),
"learningRate": obj.get("learningRate"),
"loss": obj.get("loss")
})
return _obj


Loading