@@ -401,8 +401,7 @@ spec:
401
401
rules:
402
402
- action: Ignore
403
403
onPodConditions:
404
- operator: In
405
- values: [ DisruptionTarget ]
404
+ - type: DisruptionTarget
406
405
` ` `
407
406
408
407
Note that, in this case the user supplies a list of Pod condition type values.
@@ -752,15 +751,16 @@ type PodFailurePolicyAction string
752
751
const (
753
752
// This is an action which might be taken on a pod failure - mark the
754
753
// pod's job as Failed and terminate all running pods.
755
- PodFailurePolicyActionTerminate PodFailurePolicyAction = "Terminate "
754
+ PodFailurePolicyActionFailJob PodFailurePolicyAction = "FailJob "
756
755
757
756
// This is an action which might be taken on a pod failure - the counter towards
758
- // .backoffLimit is not incremented and a replacement pod is created.
757
+ // .backoffLimit, represented by the job's .status.failed field, is not
758
+ // incremented and a replacement pod is created.
759
759
PodFailurePolicyActionIgnore PodFailurePolicyAction = "Ignore"
760
760
761
761
// This is an action which might be taken on a pod failure - the pod failure
762
- // is handled in the default way - the counter towards .backoffLimit is
763
- // incremented.
762
+ // is handled in the default way - the counter towards .backoffLimit,
763
+ // represented by the job's .status.failed field, is incremented.
764
764
PodFailurePolicyActionCount PodFailurePolicyAction = "Count"
765
765
)
766
766
@@ -772,15 +772,20 @@ const (
772
772
)
773
773
774
774
// PodFailurePolicyOnExitCodesRequirement describes the requirement for handling
775
- // a failed pod based on its container exit codes.
775
+ // a failed pod based on its container exit codes. In particular, it lookups the
776
+ // .state.terminated.exitCode for each app container and init container status,
777
+ // represented by the .status.containerStatuses and .status.initContainerStatuses
778
+ // fields in the Pod status, respectively. Containers completed with success
779
+ // (exit code 0) are excluded from the requirement check.
776
780
type PodFailurePolicyOnExitCodesRequirement struct {
777
781
// Restricts the check for exit codes to the container with the
778
782
// specified name. When null, the rule applies to all containers.
779
783
// +optional
780
784
ContainerName *string
781
785
782
786
// Represents the relationship between the container exit code(s) and the
783
- // specified values. Possible values are:
787
+ // specified values. Containers completed with success (exit code 0) are
788
+ // excluded from the requirement check.Possible values are:
784
789
// - In: the requirement is satisfied if at least one container exit code
785
790
// (might be multiple if there are multiple containers not restricted
786
791
// by the 'containerName' field) is in the set of specified values.
@@ -791,39 +796,26 @@ type PodFailurePolicyOnExitCodesRequirement struct {
791
796
792
797
// Specifies the set of values. Each returned container exit code (might be
793
798
// multiple in case of multiple containers) is checked against this set of
794
- // values with respect to the operator.
799
+ // values with respect to the operator. Value '0' cannot be used for the In
800
+ // operator.
795
801
// +listType=set
796
802
Values []int32
797
803
}
798
804
799
- type PodFailurePolicyOnPodConditionsOperator string
800
-
801
- const (
802
- PodFailurePolicyOnPodConditionsOpIn PodFailurePolicyOnPodConditionsOperator = "In"
803
- )
804
-
805
- // PodFailurePolicyOnPodConditionsRequirement describes the requirement for handling
806
- // a failed pod based on its conditions.
807
- type PodFailurePolicyOnPodConditionsRequirement struct {
808
- // Represents the relationship between the set of actual Pod condition types
809
- // and the set of specified Pod condition types. Possible values are:
810
- // - In: the requirement is satisfied, if at least one actual Pod condition
811
- // type (for a condition with status=True) is present in the set of
812
- // specified Pod condition types.
813
- Operator PodFailurePolicyOnPodConditionsOperator
814
-
815
- // Specifies the set of values. Each actual pod condition type,
816
- // with status=True, is checked against this set with respect to the operator.
817
- // +listType=set
818
- Values []api.PodConditionType
805
+ // PodFailurePolicyOnPodConditionsPattern describes a pattern for matching
806
+ // an actual pod condition type.
807
+ type PodFailurePolicyOnPodConditionsPattern struct {
808
+ // Specifies the required Pod condition type. The pattern matches a pod condition
809
+ // if the specified type equals the pod condition type.
810
+ Type api.PodConditionType
819
811
}
820
812
821
813
// PodFailurePolicyRule describes how a pod failure is handled when the requirements are met.
822
814
// Only one of OnExitCodes and onPodConditions can be used in each rule.
823
815
type PodFailurePolicyRule struct {
824
816
// Specifies the action taken on a pod failure when the requirements are satisfied.
825
817
// Possible values are:
826
- // - Terminate : indicates that the pod's job is marked as Failed and all
818
+ // - FailJob : indicates that the pod's job is marked as Failed and all
827
819
// running pods are terminated.
828
820
// - Ignore: indicates that the counter towards the .backoffLimit is not
829
821
// incremented and a replacement pod is created.
@@ -835,9 +827,11 @@ type PodFailurePolicyRule struct {
835
827
// +optional
836
828
OnExitCodes *PodFailurePolicyOnExitCodesRequirement
837
829
838
- // Represents the requirement on the pod conditions.
839
- // +optional
840
- OnPodConditions *PodFailurePolicyOnPodConditionsRequirement
830
+ // Represents the requirement on the pod conditions. The requirement is represented
831
+ // as a list of pod condition patterns. The requirement is satisfied if at
832
+ // least pattern matches an actual pod condition.
833
+ // +listType=atomic
834
+ OnPodConditions []PodFailurePolicyOnPodConditionsPattern
841
835
}
842
836
843
837
// PodFailurePolicy describes how failed pods influence the backoffLimit.
@@ -857,8 +851,13 @@ type JobSpec struct {
857
851
// Specifies the policy of handling failed pods. In particular, it allows to
858
852
// specify the set of actions and conditions which need to be
859
853
// satisfied to take the associated action.
860
- // If empty, the default behaviour applies - the counter of pod failed is
861
- // incremented and it is checked against the backoffLimit.
854
+ // If empty, the default behaviour applies - the counter of failed pods,
855
+ // represented by the jobs's .status.failed field, is incremented and it is
856
+ // checked against the backoffLimit. This field cannot be used in combination
857
+ // with restartPolicy=OnFailure.
858
+ //
859
+ // This field is alpha-level. To use this field, you must enable the
860
+ // ` JobPodFailurePolicy` feature gate (disabled by default).
862
861
// +optional
863
862
PodFailurePolicy *PodFailurePolicy
864
863
...
@@ -899,8 +898,7 @@ spec:
899
898
values : [1,2,3]
900
899
- action : Ignore
901
900
onPodConditions :
902
- operator: In
903
- values: [ DisruptionTarget ]
901
+ - type : DisruptionTarget
904
902
` ` `
905
903
906
904
### Evaluation
@@ -1086,6 +1084,9 @@ Below are some examples to consider, in addition to the aforementioned [maturity
1086
1084
indicating that a pod should be retried (see : [Evolving condition types](#evolving-condition-types))
1087
1085
- Simplify the code in job controller responsible for detection of failed pods
1088
1086
based on the fix for pods stuck in the running phase (see : [Marking pods as Failed](marking-pods-as-failed))
1087
+ - Commonize the code for appending pod conditions between components
1088
+ - Do not update the pod disruption condition (with type=`DisruptionTarget`) if
1089
+ it is already present with `status=True`
1089
1090
- Review and implement if feasible adding of pod conditions with the use of
1090
1091
[SSA](https://kubernetes.io/docs/reference/using-api/server-side-apply/) client.
1091
1092
- The feature flag enabled by default
0 commit comments