diff --git a/src/main/java/com/epam/aidial/deployment/manager/dao/entity/deployment/DeploymentEntity.java b/src/main/java/com/epam/aidial/deployment/manager/dao/entity/deployment/DeploymentEntity.java index dd1c9763..5bd004a2 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/dao/entity/deployment/DeploymentEntity.java +++ b/src/main/java/com/epam/aidial/deployment/manager/dao/entity/deployment/DeploymentEntity.java @@ -62,14 +62,8 @@ public class DeploymentEntity { @JdbcTypeCode(SqlTypes.JSON) private PersistenceDeploymentMetadata metadata; - @Column(name = "initial_scale") - private Integer initialScale; - - @Column(name = "min_scale") - private Integer minScale; - - @Column(name = "max_scale") - private Integer maxScale; + @JdbcTypeCode(SqlTypes.JSON) + private PersistenceScaling scaling; @JdbcTypeCode(SqlTypes.JSON) private PersistenceResources resources; diff --git a/src/main/java/com/epam/aidial/deployment/manager/dao/entity/deployment/InferenceDeploymentEntity.java b/src/main/java/com/epam/aidial/deployment/manager/dao/entity/deployment/InferenceDeploymentEntity.java index c97a157c..0eee97c6 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/dao/entity/deployment/InferenceDeploymentEntity.java +++ b/src/main/java/com/epam/aidial/deployment/manager/dao/entity/deployment/InferenceDeploymentEntity.java @@ -36,7 +36,4 @@ public class InferenceDeploymentEntity extends DeploymentEntity { @JdbcTypeCode(SqlTypes.JSON) private List args; - - @JdbcTypeCode(SqlTypes.JSON) - private PersistenceScaling scaling; } diff --git a/src/main/java/com/epam/aidial/deployment/manager/dao/mapper/PersistenceDeploymentMapper.java b/src/main/java/com/epam/aidial/deployment/manager/dao/mapper/PersistenceDeploymentMapper.java index 20e8fb8b..a1b59b15 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/dao/mapper/PersistenceDeploymentMapper.java +++ b/src/main/java/com/epam/aidial/deployment/manager/dao/mapper/PersistenceDeploymentMapper.java @@ -118,15 +118,13 @@ public void updateEntityFromDomain(Deployment domain, DeploymentEntity existingE existingEntity.setUrl(updatedEntity.getUrl()); existingEntity.setStatus(updatedEntity.getStatus()); existingEntity.setContainerPort(updatedEntity.getContainerPort()); - existingEntity.setInitialScale(updatedEntity.getInitialScale()); - existingEntity.setMaxScale(updatedEntity.getMaxScale()); - existingEntity.setMinScale(updatedEntity.getMinScale()); existingEntity.setEnvs(updatedEntity.getEnvs()); existingEntity.setMetadata(updatedEntity.getMetadata()); existingEntity.setResources(updatedEntity.getResources()); existingEntity.setProbeProperties(updatedEntity.getProbeProperties()); existingEntity.setAuthor(updatedEntity.getAuthor()); existingEntity.setAllowedDomains(updatedEntity.getAllowedDomains()); + existingEntity.setScaling(updatedEntity.getScaling()); if (existingEntity instanceof McpDeploymentEntity existingMcp && updatedEntity instanceof McpDeploymentEntity updatedMcp) { @@ -146,7 +144,6 @@ public void updateEntityFromDomain(Deployment domain, DeploymentEntity existingE existingInference.setSource(updatedInference.getSource()); existingInference.setCommand(updatedInference.getCommand()); existingInference.setArgs(updatedInference.getArgs()); - existingInference.setScaling(updatedInference.getScaling()); } } diff --git a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/AdapterDeployment.java b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/AdapterDeployment.java index 7a6e2f71..5f313787 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/AdapterDeployment.java +++ b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/AdapterDeployment.java @@ -1,9 +1,9 @@ package com.epam.aidial.deployment.manager.model.deployment; -import lombok.AllArgsConstructor; +import lombok.NoArgsConstructor; import lombok.experimental.SuperBuilder; @SuperBuilder -@AllArgsConstructor +@NoArgsConstructor public class AdapterDeployment extends Deployment { } diff --git a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateAdapterDeployment.java b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateAdapterDeployment.java index 75a3fabc..b7d65b6f 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateAdapterDeployment.java +++ b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateAdapterDeployment.java @@ -1,7 +1,9 @@ package com.epam.aidial.deployment.manager.model.deployment; +import lombok.NoArgsConstructor; import lombok.experimental.SuperBuilder; @SuperBuilder +@NoArgsConstructor public class CreateAdapterDeployment extends CreateDeployment { } diff --git a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateDeployment.java b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateDeployment.java index 4268677e..b4f787ee 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateDeployment.java +++ b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateDeployment.java @@ -3,6 +3,7 @@ import com.epam.aidial.deployment.manager.model.DeploymentMetadata; import com.epam.aidial.deployment.manager.model.ImageType; import com.epam.aidial.deployment.manager.model.Resources; +import com.epam.aidial.deployment.manager.model.Scaling; import com.epam.aidial.deployment.manager.model.probe.ProbeProperties; import lombok.AllArgsConstructor; import lombok.Data; @@ -25,9 +26,7 @@ public abstract class CreateDeployment { private String displayName; private String description; private DeploymentMetadata metadata; - private Integer initialScale; - private Integer minScale; - private Integer maxScale; + private Scaling scaling; private Resources resources; private ProbeProperties probeProperties; private Integer containerPort; diff --git a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateInferenceDeployment.java b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateInferenceDeployment.java index a0828c24..131bc473 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateInferenceDeployment.java +++ b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateInferenceDeployment.java @@ -1,6 +1,5 @@ package com.epam.aidial.deployment.manager.model.deployment; -import com.epam.aidial.deployment.manager.model.Scaling; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -26,6 +25,4 @@ public class CreateInferenceDeployment extends CreateDeployment { private List command; @Nullable private List args; - @Nullable - private Scaling scaling; } diff --git a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateInterceptorDeployment.java b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateInterceptorDeployment.java index fa738333..a6ea45ac 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateInterceptorDeployment.java +++ b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/CreateInterceptorDeployment.java @@ -1,7 +1,9 @@ package com.epam.aidial.deployment.manager.model.deployment; +import lombok.NoArgsConstructor; import lombok.experimental.SuperBuilder; @SuperBuilder +@NoArgsConstructor public class CreateInterceptorDeployment extends CreateDeployment { } diff --git a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/Deployment.java b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/Deployment.java index 2de8d28f..472c43bb 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/Deployment.java +++ b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/Deployment.java @@ -5,6 +5,7 @@ import com.epam.aidial.deployment.manager.model.EnvVar; import com.epam.aidial.deployment.manager.model.ImageType; import com.epam.aidial.deployment.manager.model.Resources; +import com.epam.aidial.deployment.manager.model.Scaling; import com.epam.aidial.deployment.manager.model.probe.ProbeProperties; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; @@ -39,9 +40,7 @@ public abstract class Deployment { private String description; private List envs; private DeploymentMetadata metadata; - private Integer initialScale; - private Integer minScale; - private Integer maxScale; + private Scaling scaling; private Resources resources; private ProbeProperties probeProperties; private DeploymentStatus status; diff --git a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/InferenceDeployment.java b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/InferenceDeployment.java index 860db88f..009cb390 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/InferenceDeployment.java +++ b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/InferenceDeployment.java @@ -1,6 +1,5 @@ package com.epam.aidial.deployment.manager.model.deployment; -import com.epam.aidial.deployment.manager.model.Scaling; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -26,6 +25,4 @@ public class InferenceDeployment extends Deployment { private List command; @Nullable private List args; - @Nullable - private Scaling scaling; } diff --git a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/InterceptorDeployment.java b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/InterceptorDeployment.java index e52d21c6..cc870c92 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/model/deployment/InterceptorDeployment.java +++ b/src/main/java/com/epam/aidial/deployment/manager/model/deployment/InterceptorDeployment.java @@ -1,9 +1,9 @@ package com.epam.aidial.deployment.manager.model.deployment; -import lombok.AllArgsConstructor; +import lombok.NoArgsConstructor; import lombok.experimental.SuperBuilder; @SuperBuilder -@AllArgsConstructor +@NoArgsConstructor public class InterceptorDeployment extends Deployment { } diff --git a/src/main/java/com/epam/aidial/deployment/manager/service/deployment/DeploymentService.java b/src/main/java/com/epam/aidial/deployment/manager/service/deployment/DeploymentService.java index 3aa5eb6c..4d8f173b 100755 --- a/src/main/java/com/epam/aidial/deployment/manager/service/deployment/DeploymentService.java +++ b/src/main/java/com/epam/aidial/deployment/manager/service/deployment/DeploymentService.java @@ -415,9 +415,7 @@ private static boolean isApplicableForRollingUpdate(Deployment existing, Deploym || specializedUpdate || !Objects.equals(existing.getImageDefinitionId(), updated.getImageDefinitionId()) || !Objects.equals(existing.getContainerPort(), updated.getContainerPort()) - || !Objects.equals(existing.getInitialScale(), updated.getInitialScale()) - || !Objects.equals(existing.getMinScale(), updated.getMinScale()) - || !Objects.equals(existing.getMaxScale(), updated.getMaxScale()) + || !Objects.equals(existing.getScaling(), updated.getScaling()) || !Objects.equals(existing.getResources(), updated.getResources()); } diff --git a/src/main/java/com/epam/aidial/deployment/manager/service/deployment/KnativeDeploymentManager.java b/src/main/java/com/epam/aidial/deployment/manager/service/deployment/KnativeDeploymentManager.java index 52d9fb01..28c0ad5d 100755 --- a/src/main/java/com/epam/aidial/deployment/manager/service/deployment/KnativeDeploymentManager.java +++ b/src/main/java/com/epam/aidial/deployment/manager/service/deployment/KnativeDeploymentManager.java @@ -117,9 +117,7 @@ protected Service prepareServiceSpec(Deployment deployment) { userDefinedSensitiveEnvs, userDefinedSensitiveFileEnvs, imageDefinition.getImageName(), - deployment.getInitialScale(), - deployment.getMinScale(), - deployment.getMaxScale(), + deployment.getScaling(), deployment.getResources(), containerPort, deployment.getProbeProperties()); diff --git a/src/main/java/com/epam/aidial/deployment/manager/service/manifest/DeployableManifestGenerator.java b/src/main/java/com/epam/aidial/deployment/manager/service/manifest/DeployableManifestGenerator.java index ddd74116..60941fd7 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/service/manifest/DeployableManifestGenerator.java +++ b/src/main/java/com/epam/aidial/deployment/manager/service/manifest/DeployableManifestGenerator.java @@ -1,6 +1,7 @@ package com.epam.aidial.deployment.manager.service.manifest; import com.epam.aidial.deployment.manager.configuration.AppProperties; +import com.epam.aidial.deployment.manager.model.ScalingStrategyType; import com.epam.aidial.deployment.manager.model.SensitiveEnvVar; import com.epam.aidial.deployment.manager.model.SimpleEnvVar; import com.epam.aidial.deployment.manager.utils.mapping.ListMapper; @@ -14,6 +15,9 @@ public abstract class DeployableManifestGenerator extends BaseManifestGenerator { + protected static final List SUPPORTED_SCALING_STRATEGIES = + List.of(ScalingStrategyType.ACTIVE_REQUESTS); + public DeployableManifestGenerator(AppProperties appconfig) { super(appconfig); } diff --git a/src/main/java/com/epam/aidial/deployment/manager/service/manifest/InferenceManifestGenerator.java b/src/main/java/com/epam/aidial/deployment/manager/service/manifest/InferenceManifestGenerator.java index 95bb5736..92921481 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/service/manifest/InferenceManifestGenerator.java +++ b/src/main/java/com/epam/aidial/deployment/manager/service/manifest/InferenceManifestGenerator.java @@ -180,17 +180,8 @@ private void applyScaling(String name, var annotations = config.get(InferenceMappers.SERVICE_METADATA_FIELD) .get(InferenceMappers.METADATA_ANNOTATIONS_FIELD).data(); annotations.put("autoscaling.knative.dev/initial-scale", String.valueOf(initialScale)); - log.trace("Set annotation autoscaling.knative.dev/initial-scale={} for model '{}'", initialScale, name); - - if (scaling.getStrategy().getType() == ScalingStrategyType.ACTIVE_REQUESTS) { - predictor.setScaleMetric(Predictor.ScaleMetric.CONCURRENCY); - predictor.setScaleTarget(scaling.getStrategy().getThreshold()); - log.trace("Applied strategy ACTIVE_REQUESTS: metric={}, target={} for model '{}'", - Predictor.ScaleMetric.CONCURRENCY, scaling.getStrategy().getThreshold(), name); - } else { - throw new IllegalArgumentException("Scaling strategy '%s' is not supported. Supported strategies: %s" - .formatted(scaling.getStrategy().getType(), List.of(ScalingStrategyType.ACTIVE_REQUESTS))); - } + log.trace("Set min-scale={}, max-scale={}, initial-scale={} for Inference deployment '{}'", + scaling.getMinReplicas(), scaling.getMaxReplicas(), initialScale, name); if (scaling.getScaleToZeroDelaySeconds() != null) { var delay = scaling.getScaleToZeroDelaySeconds(); @@ -199,6 +190,20 @@ private void applyScaling(String name, log.trace("Set annotation autoscaling.knative.dev/scale-to-zero-pod-retention-period={} for model '{}'", delayStr, name); } + + if (scaling.getStrategy() == null) { + return; + } + + if (scaling.getStrategy().getType() == ScalingStrategyType.ACTIVE_REQUESTS) { + predictor.setScaleMetric(Predictor.ScaleMetric.CONCURRENCY); + predictor.setScaleTarget(scaling.getStrategy().getThreshold()); + log.trace("Applied strategy ACTIVE_REQUESTS: target={} for model '{}'", + scaling.getStrategy().getThreshold(), name); + } else { + throw new IllegalArgumentException("Scaling strategy '%s' is not supported. Supported strategies: %s" + .formatted(scaling.getStrategy().getType(), SUPPORTED_SCALING_STRATEGIES)); + } } } diff --git a/src/main/java/com/epam/aidial/deployment/manager/service/manifest/KnativeManifestGenerator.java b/src/main/java/com/epam/aidial/deployment/manager/service/manifest/KnativeManifestGenerator.java index f7e404de..27f31ac0 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/service/manifest/KnativeManifestGenerator.java +++ b/src/main/java/com/epam/aidial/deployment/manager/service/manifest/KnativeManifestGenerator.java @@ -3,6 +3,8 @@ import com.epam.aidial.deployment.manager.configuration.AppProperties; import com.epam.aidial.deployment.manager.configuration.logging.LogExecution; import com.epam.aidial.deployment.manager.model.Resources; +import com.epam.aidial.deployment.manager.model.Scaling; +import com.epam.aidial.deployment.manager.model.ScalingStrategyType; import com.epam.aidial.deployment.manager.model.SensitiveEnvVar; import com.epam.aidial.deployment.manager.model.SensitiveFileEnvVar; import com.epam.aidial.deployment.manager.model.SimpleEnvVar; @@ -30,7 +32,6 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; -import java.util.HashMap; import java.util.List; @Slf4j @@ -58,9 +59,7 @@ public Service serviceConfig( List sensitiveEnv, List sensitiveFileEnvs, String imageName, - @Nullable Integer initScale, - @Nullable Integer minScale, - @Nullable Integer maxScale, + @Nullable Scaling scaling, Resources resources, @Nullable Integer containerPort, @Nullable ProbeProperties probeProperties @@ -74,9 +73,9 @@ public Service serviceConfig( var template = config.get(KnativeMappers.SERVICE_SPEC_FIELD) .get(KnativeMappers.SERVICE_TEMPLATE_FIELD); - configureAnnotations(template, initScale, minScale, maxScale, probeProperties); - var revisionSpecChain = template.get(KnativeMappers.SERVICE_TEMPLATE_SPEC_FIELD); + applyScaling(name, scaling, template, revisionSpecChain); + applyProgressDeadline(probeProperties, template); var containerChain = revisionSpecChain .getList(KnativeMappers.TEMPLATE_CONTAINERS_FIELD, Mappers.CONTAINER_NAME) .getOrDefault(appConfig.getKnativeServiceContainerConfig().getName(), appConfig::cloneKnativeServiceContainer); @@ -158,34 +157,56 @@ private void addSecretVolumesAndMountsAndApplySensitiveFileEnv( envVarChain.data().setValue(filePathToVolume); } - private void configureAnnotations( - MappingChain template, - @Nullable Integer initScale, - @Nullable Integer minScale, - @Nullable Integer maxScale, - @Nullable ProbeProperties probeProperties - ) { - var templateMetadata = template.get(KnativeMappers.SERVICE_TEMPLATE_METADATA_FIELD).data(); - var annotations = (templateMetadata.getAnnotations() != null) - ? templateMetadata.getAnnotations() - : new HashMap(); + private void applyScaling(String name, + @Nullable Scaling scaling, + MappingChain template, + MappingChain revisionSpecChain) { + log.debug("Applying scaling for Knative deployment '{}': {}", name, scaling); + if (scaling == null) { + return; + } - if (initScale != null) { - annotations.put("autoscaling.knative.dev/initial-scale", String.valueOf(initScale)); + var annotations = template.get(KnativeMappers.SERVICE_TEMPLATE_METADATA_FIELD) + .get(KnativeMappers.METADATA_ANNOTATIONS_FIELD).data(); + + var initialScale = Math.max(scaling.getMinReplicas(), 1); + annotations.put("autoscaling.knative.dev/initial-scale", String.valueOf(initialScale)); + annotations.put("autoscaling.knative.dev/min-scale", String.valueOf(scaling.getMinReplicas())); + annotations.put("autoscaling.knative.dev/max-scale", String.valueOf(scaling.getMaxReplicas())); + log.trace("Set min-scale={}, max-scale={}, initial-scale={} for Knative deployment '{}'", + scaling.getMinReplicas(), scaling.getMaxReplicas(), initialScale, name); + + if (scaling.getScaleToZeroDelaySeconds() != null) { + var delayStr = scaling.getScaleToZeroDelaySeconds() + "s"; + annotations.put("autoscaling.knative.dev/scale-to-zero-pod-retention-period", delayStr); + log.trace("Set annotation autoscaling.knative.dev/scale-to-zero-pod-retention-period={} for Knative deployment '{}'", + delayStr, name); } - if (minScale != null) { - annotations.put("autoscaling.knative.dev/min-scale", String.valueOf(minScale)); + + if (scaling.getStrategy() == null) { + return; } - if (maxScale != null) { - annotations.put("autoscaling.knative.dev/max-scale", String.valueOf(maxScale)); + + if (scaling.getStrategy().getType() == ScalingStrategyType.ACTIVE_REQUESTS) { + var target = scaling.getStrategy().getThreshold(); + revisionSpecChain.data().setContainerConcurrency((long) target); + annotations.put("autoscaling.knative.dev/target", String.valueOf(target)); + log.trace("Applied strategy ACTIVE_REQUESTS: target={} for deployment '{}'", + scaling.getStrategy().getThreshold(), name); + } else { + throw new IllegalArgumentException("Scaling strategy '%s' is not supported. Supported strategies: %s" + .formatted(scaling.getStrategy().getType(), SUPPORTED_SCALING_STRATEGIES)); } + } + private void applyProgressDeadline(@Nullable ProbeProperties probeProperties, + MappingChain template) { var progressDeadline = progressDeadlineCalculator.compute(probeProperties); if (progressDeadline != null) { + var annotations = template.get(KnativeMappers.SERVICE_TEMPLATE_METADATA_FIELD) + .get(KnativeMappers.METADATA_ANNOTATIONS_FIELD).data(); annotations.put("serving.knative.dev/progress-deadline", progressDeadline); } - - templateMetadata.setAnnotations(annotations); } private EnvVarSource buildKnativeSecretRef(SensitiveEnvVar env) { diff --git a/src/main/java/com/epam/aidial/deployment/manager/utils/mapping/KnativeMappers.java b/src/main/java/com/epam/aidial/deployment/manager/utils/mapping/KnativeMappers.java index d937139e..f68b99aa 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/utils/mapping/KnativeMappers.java +++ b/src/main/java/com/epam/aidial/deployment/manager/utils/mapping/KnativeMappers.java @@ -10,7 +10,9 @@ import lombok.experimental.UtilityClass; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; @UtilityClass public class KnativeMappers { @@ -35,6 +37,11 @@ public class KnativeMappers { RevisionTemplateSpec::getMetadata, RevisionTemplateSpec::setMetadata); + public static final FieldMapper> METADATA_ANNOTATIONS_FIELD = new FieldMapper<>( + HashMap::new, + ObjectMeta::getAnnotations, + ObjectMeta::setAnnotations); + public static final FieldMapper SERVICE_TEMPLATE_SPEC_FIELD = new FieldMapper<>( RevisionSpec::new, RevisionTemplateSpec::getSpec, diff --git a/src/main/java/com/epam/aidial/deployment/manager/web/dto/DeploymentInfoDto.java b/src/main/java/com/epam/aidial/deployment/manager/web/dto/DeploymentInfoDto.java index 52df471e..18beef6d 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/web/dto/DeploymentInfoDto.java +++ b/src/main/java/com/epam/aidial/deployment/manager/web/dto/DeploymentInfoDto.java @@ -18,9 +18,6 @@ public record DeploymentInfoDto( @NotNull String displayName, @Nullable String description, @Nullable String author, - @Nullable Integer initialScale, - @Nullable Integer minScale, - @Nullable Integer maxScale, @Nullable ResourcesDto resources, @NotNull DeploymentStatusDto status, @Nullable String url, diff --git a/src/main/java/com/epam/aidial/deployment/manager/web/dto/ScalingDto.java b/src/main/java/com/epam/aidial/deployment/manager/web/dto/ScalingDto.java index c2f76ce2..2eee3ea4 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/web/dto/ScalingDto.java +++ b/src/main/java/com/epam/aidial/deployment/manager/web/dto/ScalingDto.java @@ -3,7 +3,6 @@ import com.epam.aidial.deployment.manager.web.validation.ValidScaling; import jakarta.validation.Valid; import jakarta.validation.constraints.Min; -import jakarta.validation.constraints.NotNull; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; @@ -20,6 +19,6 @@ public class ScalingDto { private int maxReplicas; @Nullable @Min(1) private Integer scaleToZeroDelaySeconds; - @NotNull @Valid + @Nullable @Valid private ScalingStrategyDto strategy; } diff --git a/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/CreateDeploymentRequestDto.java b/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/CreateDeploymentRequestDto.java index 0661cc99..32b20e83 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/CreateDeploymentRequestDto.java +++ b/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/CreateDeploymentRequestDto.java @@ -2,10 +2,10 @@ import com.epam.aidial.deployment.manager.web.dto.DeploymentMetadataDto; import com.epam.aidial.deployment.manager.web.dto.ResourcesDto; +import com.epam.aidial.deployment.manager.web.dto.ScalingDto; import com.epam.aidial.deployment.manager.web.dto.probe.ProbePropertiesDto; import com.epam.aidial.deployment.manager.web.validation.ValidDomainList; import com.epam.aidial.deployment.manager.web.validation.ValidResources; -import com.epam.aidial.deployment.manager.web.validation.ValidScaleConfiguration; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; import jakarta.validation.Valid; @@ -33,7 +33,6 @@ @JsonSubTypes.Type(value = CreateNimDeploymentRequestDto.class, name = "nim"), @JsonSubTypes.Type(value = CreateInferenceDeploymentRequestDto.class, name = "inference"), }) -@ValidScaleConfiguration @Data @NoArgsConstructor @AllArgsConstructor @@ -51,11 +50,8 @@ public abstract class CreateDeploymentRequestDto { @Valid private DeploymentMetadataDto metadata; @Nullable - private Integer initialScale; - @Nullable - private Integer minScale; - @Nullable - private Integer maxScale; + @Valid + private ScalingDto scaling; @Nullable @ValidResources private ResourcesDto resources; diff --git a/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/CreateInferenceDeploymentRequestDto.java b/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/CreateInferenceDeploymentRequestDto.java index 9ef8a0a2..4a2c716e 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/CreateInferenceDeploymentRequestDto.java +++ b/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/CreateInferenceDeploymentRequestDto.java @@ -1,6 +1,5 @@ package com.epam.aidial.deployment.manager.web.dto.deployment; -import com.epam.aidial.deployment.manager.web.dto.ScalingDto; import jakarta.validation.Valid; import jakarta.validation.constraints.NotNull; import lombok.AllArgsConstructor; @@ -25,7 +24,5 @@ public class CreateInferenceDeploymentRequestDto extends CreateDeploymentRequest private String command; @Nullable private String args; - @Nullable @Valid - private ScalingDto scaling; } diff --git a/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/DeploymentDto.java b/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/DeploymentDto.java index 8135ae93..8ed6d8b2 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/DeploymentDto.java +++ b/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/DeploymentDto.java @@ -3,6 +3,7 @@ import com.epam.aidial.deployment.manager.web.dto.DeploymentMetadataDto; import com.epam.aidial.deployment.manager.web.dto.DeploymentStatusDto; import com.epam.aidial.deployment.manager.web.dto.ResourcesDto; +import com.epam.aidial.deployment.manager.web.dto.ScalingDto; import com.epam.aidial.deployment.manager.web.dto.probe.ProbePropertiesDto; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; @@ -41,11 +42,8 @@ public abstract class DeploymentDto { @Valid private DeploymentMetadataDto metadata; @Nullable - private Integer initialScale; - @Nullable - private Integer minScale; - @Nullable - private Integer maxScale; + @Valid + private ScalingDto scaling; @Nullable private ResourcesDto resources; @Nullable diff --git a/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/InferenceDeploymentDto.java b/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/InferenceDeploymentDto.java index 83ce4906..709ee9a3 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/InferenceDeploymentDto.java +++ b/src/main/java/com/epam/aidial/deployment/manager/web/dto/deployment/InferenceDeploymentDto.java @@ -1,6 +1,5 @@ package com.epam.aidial.deployment.manager.web.dto.deployment; -import com.epam.aidial.deployment.manager.web.dto.ScalingDto; import jakarta.validation.Valid; import jakarta.validation.constraints.NotNull; import lombok.AllArgsConstructor; @@ -24,6 +23,4 @@ public class InferenceDeploymentDto extends DeploymentDto { private String command; @Nullable private String args; - @Nullable @Valid - private ScalingDto scaling; } diff --git a/src/main/java/com/epam/aidial/deployment/manager/web/mapper/DeploymentDtoMapper.java b/src/main/java/com/epam/aidial/deployment/manager/web/mapper/DeploymentDtoMapper.java index f309fd9e..e7613625 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/web/mapper/DeploymentDtoMapper.java +++ b/src/main/java/com/epam/aidial/deployment/manager/web/mapper/DeploymentDtoMapper.java @@ -81,7 +81,7 @@ @Slf4j @Mapper( componentModel = "spring", - uses = {EnvVarValueDtoMapper.class, ProbePropertiesDtoMapper.class}, + uses = {EnvVarValueDtoMapper.class, ProbePropertiesDtoMapper.class, ScalingDtoMapper.class}, subclassExhaustiveStrategy = SubclassExhaustiveStrategy.RUNTIME_EXCEPTION ) public abstract class DeploymentDtoMapper { diff --git a/src/main/java/com/epam/aidial/deployment/manager/web/mapper/ScalingDtoMapper.java b/src/main/java/com/epam/aidial/deployment/manager/web/mapper/ScalingDtoMapper.java new file mode 100644 index 00000000..b6de1e05 --- /dev/null +++ b/src/main/java/com/epam/aidial/deployment/manager/web/mapper/ScalingDtoMapper.java @@ -0,0 +1,25 @@ +package com.epam.aidial.deployment.manager.web.mapper; + +import com.epam.aidial.deployment.manager.model.Scaling; +import com.epam.aidial.deployment.manager.model.ScalingStrategy; +import com.epam.aidial.deployment.manager.model.ScalingStrategyType; +import com.epam.aidial.deployment.manager.web.dto.ScalingDto; +import com.epam.aidial.deployment.manager.web.dto.ScalingStrategyDto; +import com.epam.aidial.deployment.manager.web.dto.ScalingStrategyTypeDto; +import org.mapstruct.Mapper; + +@Mapper(componentModel = "spring") +public interface ScalingDtoMapper { + + Scaling toScaling(ScalingDto dto); + + ScalingDto toScalingDto(Scaling model); + + ScalingStrategy toScalingStrategy(ScalingStrategyDto dto); + + ScalingStrategyDto toScalingStrategyDto(ScalingStrategy model); + + ScalingStrategyType toScalingStrategyType(ScalingStrategyTypeDto dto); + + ScalingStrategyTypeDto toScalingStrategyTypeDto(ScalingStrategyType model); +} diff --git a/src/main/java/com/epam/aidial/deployment/manager/web/validation/ScaleConfigurationValidator.java b/src/main/java/com/epam/aidial/deployment/manager/web/validation/ScaleConfigurationValidator.java deleted file mode 100644 index e7e15ab9..00000000 --- a/src/main/java/com/epam/aidial/deployment/manager/web/validation/ScaleConfigurationValidator.java +++ /dev/null @@ -1,81 +0,0 @@ -package com.epam.aidial.deployment.manager.web.validation; - -import com.epam.aidial.deployment.manager.web.dto.deployment.CreateDeploymentRequestDto; -import jakarta.validation.ConstraintValidator; -import jakarta.validation.ConstraintValidatorContext; -import lombok.extern.slf4j.Slf4j; - -@Slf4j -public class ScaleConfigurationValidator implements ConstraintValidator { - - private static final int MIN_ALLOWED_SCALE = 0; - private static final int MAX_ALLOWED_SCALE = 10; - - @Override - public boolean isValid(CreateDeploymentRequestDto value, ConstraintValidatorContext context) { - if (value == null) { - return true; - } - - Integer minScale = value.getMinScale(); - Integer initialScale = value.getInitialScale(); - Integer maxScale = value.getMaxScale(); - - log.debug("Validating scale configuration: minScale={}, initialScale={}, maxScale={}", minScale, initialScale, maxScale); - - // If all are null, treat as not specified and valid. - if (minScale == null && initialScale == null && maxScale == null) { - return true; - } - - boolean valid = true; - context.disableDefaultConstraintViolation(); - - // Range checks - if (!isWithinAllowedRange(minScale)) { - addViolation(context, "minScale must be between %d and %d".formatted(MIN_ALLOWED_SCALE, MAX_ALLOWED_SCALE)); - valid = false; - } - - if (!isWithinAllowedRange(initialScale)) { - addViolation(context, "initialScale must be between %d and %d".formatted(MIN_ALLOWED_SCALE, MAX_ALLOWED_SCALE)); - valid = false; - } - - if (!isWithinAllowedRange(maxScale)) { - addViolation(context, "maxScale must be between %d and %d".formatted(MIN_ALLOWED_SCALE, MAX_ALLOWED_SCALE)); - valid = false; - } - - // Ordering checks (only where both sides are provided) - if (minScale != null && initialScale != null && minScale > initialScale) { - addViolation(context, "minScale must be less than or equal to initialScale"); - valid = false; - } - - if (initialScale != null && maxScale != null && initialScale > maxScale) { - addViolation(context, "initialScale must be less than or equal to maxScale"); - valid = false; - } - - if (minScale != null && maxScale != null && minScale > maxScale) { - addViolation(context, "minScale must be less than or equal to maxScale"); - valid = false; - } - - return valid; - } - - private boolean isWithinAllowedRange(Integer value) { - if (value == null) { - return true; - } - return value >= MIN_ALLOWED_SCALE && value <= MAX_ALLOWED_SCALE; - } - - private void addViolation(ConstraintValidatorContext context, String message) { - context.buildConstraintViolationWithTemplate(message) - .addConstraintViolation(); - } -} - diff --git a/src/main/java/com/epam/aidial/deployment/manager/web/validation/ScalingValidator.java b/src/main/java/com/epam/aidial/deployment/manager/web/validation/ScalingValidator.java index 5498632a..ad28126b 100644 --- a/src/main/java/com/epam/aidial/deployment/manager/web/validation/ScalingValidator.java +++ b/src/main/java/com/epam/aidial/deployment/manager/web/validation/ScalingValidator.java @@ -26,6 +26,27 @@ public boolean isValid(ScalingDto value, ConstraintValidatorContext context) { return false; } + if (minReplicas == maxReplicas && value.getStrategy() != null) { + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate("strategy must be null when minReplicas equals maxReplicas") + .addConstraintViolation(); + return false; + } + + if (minReplicas == 0 && maxReplicas == 1 && value.getStrategy() != null) { + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate("strategy must be null when minReplicas is 0 and maxReplicas is 1") + .addConstraintViolation(); + return false; + } + + if (minReplicas != maxReplicas && !(minReplicas == 0 && maxReplicas == 1) && value.getStrategy() == null) { + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate("strategy must not be null when minReplicas does not equal maxReplicas") + .addConstraintViolation(); + return false; + } + return true; } } diff --git a/src/main/java/com/epam/aidial/deployment/manager/web/validation/ValidScaleConfiguration.java b/src/main/java/com/epam/aidial/deployment/manager/web/validation/ValidScaleConfiguration.java deleted file mode 100644 index 8e376948..00000000 --- a/src/main/java/com/epam/aidial/deployment/manager/web/validation/ValidScaleConfiguration.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.epam.aidial.deployment.manager.web.validation; - -import jakarta.validation.Constraint; -import jakarta.validation.Payload; - -import java.lang.annotation.Documented; -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - -/** - * Validates that scale-related fields on {@code CreateDeploymentRequestDto} - * satisfy the following invariant where values are provided: - * 0 <= minScale <= initialScale <= maxScale <= 10. - */ -@Documented -@Constraint(validatedBy = ScaleConfigurationValidator.class) -@Target(ElementType.TYPE) -@Retention(RetentionPolicy.RUNTIME) -public @interface ValidScaleConfiguration { - - String message() default "Scale values must satisfy 0 <= minScale <= initialScale <= maxScale <= 10"; - - Class[] groups() default {}; - - Class[] payload() default {}; -} - diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index b1ee6017..37b26e31 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -161,7 +161,7 @@ app: annotations: "[autoscaling.knative.dev/initial-scale]": ${KNATIVE_SERVICE_DEFAULT_INITIAL_SCALE:1} "[autoscaling.knative.dev/min-scale]": ${KNATIVE_SERVICE_DEFAULT_MIN_SCALE:0} - "[autoscaling.knative.dev/max-scale]": ${KNATIVE_SERVICE_DEFAULT_MAX_SCALE:3} + "[autoscaling.knative.dev/max-scale]": ${KNATIVE_SERVICE_DEFAULT_MAX_SCALE:1} "[autoscaling.knative.dev/window]": ${KNATIVE_SERVICE_DEFAULT_WINDOW:300s} spec: idleTimeoutSeconds: 300 diff --git a/src/main/resources/db/migration/H2/V1.47__AddScalingToDeploymentTable.sql b/src/main/resources/db/migration/H2/V1.47__AddScalingToDeploymentTable.sql new file mode 100644 index 00000000..ec40ab7e --- /dev/null +++ b/src/main/resources/db/migration/H2/V1.47__AddScalingToDeploymentTable.sql @@ -0,0 +1,13 @@ +-- Add scaling column to base deployment table +ALTER TABLE deployment ADD COLUMN scaling JSON; + +-- Migrate existing inference scaling data to base deployment table +UPDATE deployment d SET scaling = (SELECT scaling FROM inference_deployment c WHERE c.id = d.id) WHERE d.id IN (SELECT id FROM inference_deployment WHERE scaling IS NOT NULL); + +-- Drop scaling column from inference_deployment (added in V1.43) +ALTER TABLE inference_deployment DROP COLUMN scaling; + +-- Drop deprecated scale columns from deployment table +ALTER TABLE deployment DROP COLUMN initial_scale; +ALTER TABLE deployment DROP COLUMN min_scale; +ALTER TABLE deployment DROP COLUMN max_scale; diff --git a/src/main/resources/db/migration/MS_SQL_SERVER/V1.47__AddScalingToDeploymentTable.sql b/src/main/resources/db/migration/MS_SQL_SERVER/V1.47__AddScalingToDeploymentTable.sql new file mode 100644 index 00000000..b4b1ff5c --- /dev/null +++ b/src/main/resources/db/migration/MS_SQL_SERVER/V1.47__AddScalingToDeploymentTable.sql @@ -0,0 +1,28 @@ +-- Add scaling column to base deployment table +ALTER TABLE deployment ADD scaling VARCHAR(MAX); +go + +ALTER TABLE deployment + ADD CONSTRAINT chk_deployment_scaling_is_json CHECK (scaling IS NULL OR isjson(scaling) > 0); +go + +-- Migrate existing inference scaling data to base deployment table +UPDATE d SET d.scaling = c.scaling FROM deployment d INNER JOIN inference_deployment c ON c.id = d.id WHERE c.scaling IS NOT NULL; +go + +-- Drop constraint and column from inference_deployment (added in V1.43) +ALTER TABLE inference_deployment DROP CONSTRAINT chk_inference_deployment_scaling_is_json; +go + +ALTER TABLE inference_deployment DROP COLUMN scaling; +go + +-- Drop deprecated scale columns from deployment table +ALTER TABLE deployment DROP COLUMN initial_scale; +go + +ALTER TABLE deployment DROP COLUMN min_scale; +go + +ALTER TABLE deployment DROP COLUMN max_scale; +go diff --git a/src/main/resources/db/migration/POSTGRES/V1.47__AddScalingToDeploymentTable.sql b/src/main/resources/db/migration/POSTGRES/V1.47__AddScalingToDeploymentTable.sql new file mode 100644 index 00000000..f7ab5b81 --- /dev/null +++ b/src/main/resources/db/migration/POSTGRES/V1.47__AddScalingToDeploymentTable.sql @@ -0,0 +1,13 @@ +-- Add scaling column to base deployment table +ALTER TABLE deployment ADD COLUMN scaling JSONB; + +-- Migrate existing inference scaling data to base deployment table +UPDATE deployment d SET scaling = c.scaling FROM inference_deployment c WHERE c.id = d.id AND c.scaling IS NOT NULL; + +-- Drop scaling column from inference_deployment (added in V1.43) +ALTER TABLE inference_deployment DROP COLUMN scaling; + +-- Drop deprecated scale columns from deployment table +ALTER TABLE deployment DROP COLUMN initial_scale; +ALTER TABLE deployment DROP COLUMN min_scale; +ALTER TABLE deployment DROP COLUMN max_scale; diff --git a/src/test/java/com/epam/aidial/deployment/manager/dao/repository/DeploymentRepositoryTest.java b/src/test/java/com/epam/aidial/deployment/manager/dao/repository/DeploymentRepositoryTest.java index a2fb104c..d0188c6d 100755 --- a/src/test/java/com/epam/aidial/deployment/manager/dao/repository/DeploymentRepositoryTest.java +++ b/src/test/java/com/epam/aidial/deployment/manager/dao/repository/DeploymentRepositoryTest.java @@ -398,9 +398,6 @@ private DeploymentEntity createDeploymentEntity(String deploymentId, UUID imageD deploymentEntity.setDisplayName("test-deployment"); deploymentEntity.setDescription("Test Description"); deploymentEntity.setEnvs(persistenceEnvs); - deploymentEntity.setInitialScale(1); - deploymentEntity.setMinScale(1); - deploymentEntity.setMaxScale(3); deploymentEntity.setResources(persistenceResources); deploymentEntity.setStatus(PersistenceDeploymentStatus.RUNNING); deploymentEntity.setUrl("http://test-deployment.url"); @@ -428,9 +425,6 @@ private Deployment createDeployment(String deploymentId, UUID imageDefinitionId) .displayName("test-deployment") .description("Test Description") .envs(envs) - .initialScale(1) - .minScale(1) - .maxScale(3) .resources(resources) .status(DeploymentStatus.RUNNING) .url("http://test-deployment.url") diff --git a/src/test/java/com/epam/aidial/deployment/manager/functional/tests/ConfigExportImportFunctionalTest.java b/src/test/java/com/epam/aidial/deployment/manager/functional/tests/ConfigExportImportFunctionalTest.java index a36cfad2..7b7ad2f0 100644 --- a/src/test/java/com/epam/aidial/deployment/manager/functional/tests/ConfigExportImportFunctionalTest.java +++ b/src/test/java/com/epam/aidial/deployment/manager/functional/tests/ConfigExportImportFunctionalTest.java @@ -19,6 +19,9 @@ import com.epam.aidial.deployment.manager.model.McpTransport; import com.epam.aidial.deployment.manager.model.McpTransportType; import com.epam.aidial.deployment.manager.model.Resources; +import com.epam.aidial.deployment.manager.model.Scaling; +import com.epam.aidial.deployment.manager.model.ScalingStrategy; +import com.epam.aidial.deployment.manager.model.ScalingStrategyType; import com.epam.aidial.deployment.manager.model.SimpleEnvVarValue; import com.epam.aidial.deployment.manager.model.config.ExportConfigComponent; import com.epam.aidial.deployment.manager.model.config.ExportConfigComponentType; @@ -245,9 +248,7 @@ private static List buildExportDeployments() { .displayName("MCP deployment export test") .description("MCP deployment for import test") .metadata(mcpDeploymentMetadata()) - .initialScale(1) - .minScale(0) - .maxScale(5) + .scaling(new Scaling(0, 5, 300, new ScalingStrategy(ScalingStrategyType.PENDING_REQUESTS, 10))) .resources(EMPTY_RESOURCES) .probeProperties(probeTcp8080) .containerPort(8080) @@ -264,9 +265,7 @@ private static List buildExportDeployments() { .displayName("Adapter deployment export test") .description("Adapter deployment for import test") .metadata(adapterDeploymentMetadata()) - .initialScale(1) - .minScale(0) - .maxScale(5) + .scaling(new Scaling(0, 5, null, new ScalingStrategy(ScalingStrategyType.ACTIVE_REQUESTS, 50))) .resources(EMPTY_RESOURCES) .containerPort(5000) .allowedDomains(List.of("*")) @@ -280,9 +279,7 @@ private static List buildExportDeployments() { .displayName("Interceptor deployment export test") .description("Interceptor deployment for import test") .metadata(interceptorDeploymentMetadata()) - .initialScale(1) - .minScale(0) - .maxScale(5) + .scaling(new Scaling(0, 5, 600, null)) .resources(EMPTY_RESOURCES) .probeProperties(probeTcp8080) .containerPort(8080) @@ -295,9 +292,6 @@ private static List buildExportDeployments() { .displayName("NIM deployment export test") .description("NIM deployment for import test") .metadata(new DeploymentMetadata(List.of())) - .initialScale(1) - .minScale(0) - .maxScale(2) .resources(EMPTY_RESOURCES) .probeProperties(nimProbe) .containerPort(8000) @@ -313,9 +307,6 @@ private static List buildExportDeployments() { .description("Inference deployment for import test") .modelFormat("huggingface") .metadata(new DeploymentMetadata(List.of())) - .initialScale(1) - .minScale(0) - .maxScale(3) .resources(EMPTY_RESOURCES) .probeProperties(inferenceProbe) .containerPort(8080) @@ -468,9 +459,7 @@ private void assertDeploymentEquals(Deployment actual, Deployment expected) { Assertions.assertEquals(expected.getId(), actual.getId(), "id"); Assertions.assertEquals(expected.getDisplayName(), actual.getDisplayName(), "displayName"); Assertions.assertEquals(expected.getDescription(), actual.getDescription(), "description"); - Assertions.assertEquals(expected.getInitialScale(), actual.getInitialScale(), "initialScale"); - Assertions.assertEquals(expected.getMinScale(), actual.getMinScale(), "minScale"); - Assertions.assertEquals(expected.getMaxScale(), actual.getMaxScale(), "maxScale"); + Assertions.assertEquals(expected.getScaling(), actual.getScaling(), "scaling"); Assertions.assertEquals(expected.getContainerPort(), actual.getContainerPort(), "containerPort"); Assertions.assertEquals(expected.getResources(), actual.getResources(), "resources"); Assertions.assertEquals(expected.getAllowedDomains(), actual.getAllowedDomains(), "allowedDomains"); diff --git a/src/test/java/com/epam/aidial/deployment/manager/functional/tests/DeploymentFunctionalTest.java b/src/test/java/com/epam/aidial/deployment/manager/functional/tests/DeploymentFunctionalTest.java index 13aa4d9a..dc695720 100644 --- a/src/test/java/com/epam/aidial/deployment/manager/functional/tests/DeploymentFunctionalTest.java +++ b/src/test/java/com/epam/aidial/deployment/manager/functional/tests/DeploymentFunctionalTest.java @@ -123,9 +123,6 @@ public void shouldSuccessfullyCreateInterceptorDeployment() { Assertions.assertEquals(createDeployment.getImageDefinitionId(), deployment.getImageDefinitionId()); Assertions.assertEquals(imageDefinitionName, deployment.getImageDefinitionName()); Assertions.assertEquals(imageDefinitionVersion, deployment.getImageDefinitionVersion()); - Assertions.assertEquals(createDeployment.getMinScale(), deployment.getMinScale()); - Assertions.assertEquals(createDeployment.getMaxScale(), deployment.getMaxScale()); - Assertions.assertEquals(createDeployment.getInitialScale(), deployment.getInitialScale()); Assertions.assertEquals(createDeployment.getResources(), deployment.getResources()); Assertions.assertEquals(DeploymentStatus.NOT_DEPLOYED, deployment.getStatus()); assertEnvsAreEqual(expectedEnvVars, deployment.getEnvs()); @@ -153,9 +150,6 @@ public void shouldSuccessfullyCreateAdapterDeployment() { Assertions.assertEquals(createDeployment.getImageDefinitionId(), deployment.getImageDefinitionId()); Assertions.assertEquals(adapterImageDef.getName(), deployment.getImageDefinitionName()); Assertions.assertEquals(adapterImageDef.getVersion(), deployment.getImageDefinitionVersion()); - Assertions.assertEquals(createDeployment.getMinScale(), deployment.getMinScale()); - Assertions.assertEquals(createDeployment.getMaxScale(), deployment.getMaxScale()); - Assertions.assertEquals(createDeployment.getInitialScale(), deployment.getInitialScale()); Assertions.assertEquals(createDeployment.getResources(), deployment.getResources()); Assertions.assertEquals(DeploymentStatus.NOT_DEPLOYED, deployment.getStatus()); assertEnvsAreEqual(expectedEnvVars, deployment.getEnvs()); @@ -838,9 +832,6 @@ public void shouldSuccessfullyDuplicateDeployment() { Assertions.assertEquals("cloned-deployment", clonedDeployment.getDisplayName()); Assertions.assertEquals(originalDeployment.getDescription(), clonedDeployment.getDescription()); Assertions.assertEquals(originalDeployment.getImageDefinitionId(), clonedDeployment.getImageDefinitionId()); - Assertions.assertEquals(originalDeployment.getMinScale(), clonedDeployment.getMinScale()); - Assertions.assertEquals(originalDeployment.getMaxScale(), clonedDeployment.getMaxScale()); - Assertions.assertEquals(originalDeployment.getInitialScale(), clonedDeployment.getInitialScale()); Assertions.assertEquals(originalDeployment.getResources(), clonedDeployment.getResources()); Assertions.assertEquals(originalDeployment.getContainerPort(), clonedDeployment.getContainerPort()); Assertions.assertEquals(DeploymentStatus.NOT_DEPLOYED, clonedDeployment.getStatus()); diff --git a/src/test/java/com/epam/aidial/deployment/manager/functional/tests/FullWorkflowWithMockedK8sClientFunctionalTest.java b/src/test/java/com/epam/aidial/deployment/manager/functional/tests/FullWorkflowWithMockedK8sClientFunctionalTest.java index 65960edc..5358a27a 100644 --- a/src/test/java/com/epam/aidial/deployment/manager/functional/tests/FullWorkflowWithMockedK8sClientFunctionalTest.java +++ b/src/test/java/com/epam/aidial/deployment/manager/functional/tests/FullWorkflowWithMockedK8sClientFunctionalTest.java @@ -331,7 +331,7 @@ private static Service createKnativeService(String serviceName, String image, St Map annotations = new HashMap<>(); annotations.put("autoscaling.knative.dev/initial-scale", "1"); annotations.put("autoscaling.knative.dev/min-scale", "0"); - annotations.put("autoscaling.knative.dev/max-scale", "5"); + annotations.put("autoscaling.knative.dev/max-scale", "1"); annotations.put("autoscaling.knative.dev/window", "300s"); templateMeta.setAnnotations(annotations); diff --git a/src/test/java/com/epam/aidial/deployment/manager/functional/utils/FunctionalTestHelper.java b/src/test/java/com/epam/aidial/deployment/manager/functional/utils/FunctionalTestHelper.java index 2117037e..15435e3c 100644 --- a/src/test/java/com/epam/aidial/deployment/manager/functional/utils/FunctionalTestHelper.java +++ b/src/test/java/com/epam/aidial/deployment/manager/functional/utils/FunctionalTestHelper.java @@ -159,9 +159,6 @@ public static CreateDeployment createInterceptorDeploymentRequest(UUID imageDefi .imageDefinitionId(imageDefinitionId) .displayName("test-deployment") .description("Test deployment description") - .initialScale(1) - .minScale(0) - .maxScale(5) .resources(createResources()) .author("test-author") .metadata(createMetadata()) @@ -175,9 +172,6 @@ public static CreateDeployment createAdapterDeploymentRequest(UUID imageDefiniti .imageDefinitionId(imageDefinitionId) .displayName("test-adapter-deployment") .description("Test adapter deployment description") - .initialScale(1) - .minScale(0) - .maxScale(5) .resources(createResources()) .author("test-author") .metadata(createMetadata()) @@ -190,9 +184,6 @@ public static CreateDeployment createRealInterceptorDeploymentRequest(String nam .id(name) .displayName(name) .description("Test deployment description") - .initialScale(1) - .minScale(0) - .maxScale(5) .resources(createResources()) .author("test-author") .metadata(new DeploymentMetadata(envs)) @@ -207,9 +198,6 @@ public static CreateDeployment createMcpDeploymentRequest(UUID imageDefinitionId .imageDefinitionId(imageDefinitionId) .displayName("test-deployment") .description("Test deployment description") - .initialScale(1) - .minScale(0) - .maxScale(5) .resources(createResources()) .author("test-author") .metadata(createMetadata()) @@ -224,9 +212,6 @@ public static CreateDeployment createRealMcpDeploymentRequest(String name, List< .id(name) .displayName(name) .description("Test deployment description") - .initialScale(1) - .minScale(0) - .maxScale(5) .resources(createResources()) .author("test-author") .metadata(new DeploymentMetadata(envs)) diff --git a/src/test/java/com/epam/aidial/deployment/manager/service/deployment/KnativeDeploymentManagerTest.java b/src/test/java/com/epam/aidial/deployment/manager/service/deployment/KnativeDeploymentManagerTest.java index 9eca47a0..a8d22477 100644 --- a/src/test/java/com/epam/aidial/deployment/manager/service/deployment/KnativeDeploymentManagerTest.java +++ b/src/test/java/com/epam/aidial/deployment/manager/service/deployment/KnativeDeploymentManagerTest.java @@ -301,9 +301,7 @@ void deploy_shouldDeployKnativeService() { any(), any(), eq(IMAGE_NAME), - eq(deployment.getInitialScale()), - eq(deployment.getMinScale()), - eq(deployment.getMaxScale()), + any(), eq(deployment.getResources()), eq(containerPort), any() @@ -382,7 +380,7 @@ void deploy_shouldHandleExceptionDuringDeployment() { when(imageDefinitionService.getImageDefinition(IMAGE_DEFINITION_ID)).thenReturn(Optional.of(imageDefinition)); when(containerPortResolver.resolveContainerPort(any(), anyInt())).thenReturn(containerPort); when(knativeManifestGenerator.serviceConfig( - any(), any(), any(), any(), any(), any(), any(), any(), any(), anyInt(), any() + any(), any(), any(), any(), any(), any(), any(), any(), any() )).thenReturn(serviceSpec); when(ciliumNetworkPolicyCreator.isCiliumNetworkPoliciesEnabled()).thenReturn(true); when(ciliumNetworkPolicyCreator.create(eq(NAMESPACE), anyString(), anyString(), anyList(), eq(Set.of(containerPort)))).thenReturn(ciliumNetworkPolicy); diff --git a/src/test/java/com/epam/aidial/deployment/manager/service/manifest/KnativeManifestGeneratorTest.java b/src/test/java/com/epam/aidial/deployment/manager/service/manifest/KnativeManifestGeneratorTest.java index aa8b3bcb..7b65e458 100644 --- a/src/test/java/com/epam/aidial/deployment/manager/service/manifest/KnativeManifestGeneratorTest.java +++ b/src/test/java/com/epam/aidial/deployment/manager/service/manifest/KnativeManifestGeneratorTest.java @@ -2,6 +2,7 @@ import com.epam.aidial.deployment.manager.configuration.AppProperties; import com.epam.aidial.deployment.manager.model.Resources; +import com.epam.aidial.deployment.manager.model.Scaling; import com.epam.aidial.deployment.manager.model.SensitiveEnvVar; import com.epam.aidial.deployment.manager.model.SimpleEnvVar; import com.epam.aidial.deployment.manager.model.SimpleEnvVarValue; @@ -68,7 +69,7 @@ void testServiceConfig_withOverriddenEnvs() throws JsonProcessingException, JSON // When var generatedService = manifestGenerator.serviceConfig( deploymentName, simpleEnvs, sensitiveEnvs, Collections.emptyList(), imageName, - null, null, null, resources, null, null + null, resources, null, null ); // Then @@ -82,11 +83,12 @@ void testServiceConfig_withOverriddenScaling() throws JsonProcessingException, J // Given var deploymentName = "scaling-app"; var imageName = "my-registry/scaling-image:v1"; + var scaling = new Scaling(0, 10, null, null); // When var generatedService = manifestGenerator.serviceConfig( deploymentName, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), imageName, - 0, 0, 10, new Resources(), null, null + scaling, new Resources(), null, null ); // Then @@ -108,7 +110,7 @@ void testServiceConfig_withOverriddenResources() throws JsonProcessingException, // When var generatedService = manifestGenerator.serviceConfig( deploymentName, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), imageName, - null, null, null, resources, null, null + null, resources, null, null ); // Then @@ -127,7 +129,7 @@ public void testGenerateKnativeServiceWithContainerPort() { // When var generatedService = manifestGenerator.serviceConfig( deploymentName, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), imageName, - null, null, null, new Resources(), containerPort, null + null, new Resources(), containerPort, null ); // Then @@ -150,7 +152,7 @@ public void testGenerateKnativeServiceWithoutContainerPortWhenNull() { // When var generatedService = manifestGenerator.serviceConfig( deploymentName, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), imageName, - null, null, null, new Resources(), null, null + null, new Resources(), null, null ); // Then @@ -172,7 +174,7 @@ void testServiceConfig_withProbeProperties_setsProgressDeadlineAnnotation() { // When var generatedService = generatorWithRealConverter.serviceConfig( deploymentName, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), imageName, - null, null, null, new Resources(), null, probeProperties + null, new Resources(), null, probeProperties ); // Then @@ -189,7 +191,7 @@ void testServiceConfig_withoutProbe_doesNotSetProgressDeadlineAnnotation() { // When var generatedService = manifestGenerator.serviceConfig( deploymentName, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), imageName, - null, null, null, new Resources(), null, null + null, new Resources(), null, null ); // Then @@ -210,7 +212,7 @@ void testServiceConfig_withProbeProperties_setsStartupProbeOnContainer() { // When var generatedService = generatorWithRealConverter.serviceConfig( deploymentName, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), imageName, - null, null, null, new Resources(), null, probeProperties + null, new Resources(), null, probeProperties ); // Then: container has startup probe with expected path, port and timing diff --git a/src/test/java/com/epam/aidial/deployment/manager/web/controller/none/DeploymentControllerTest.java b/src/test/java/com/epam/aidial/deployment/manager/web/controller/none/DeploymentControllerTest.java index 785a8b5d..8ecc2ef5 100644 --- a/src/test/java/com/epam/aidial/deployment/manager/web/controller/none/DeploymentControllerTest.java +++ b/src/test/java/com/epam/aidial/deployment/manager/web/controller/none/DeploymentControllerTest.java @@ -34,6 +34,7 @@ import com.epam.aidial.deployment.manager.web.mapper.DeploymentDtoMapperImpl; import com.epam.aidial.deployment.manager.web.mapper.EnvVarValueDtoMapperImpl; import com.epam.aidial.deployment.manager.web.mapper.ProbePropertiesDtoMapperImpl; +import com.epam.aidial.deployment.manager.web.mapper.ScalingDtoMapperImpl; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import org.junit.jupiter.api.Test; @@ -76,6 +77,7 @@ DeploymentDtoMapperImpl.class, ProbePropertiesDtoMapperImpl.class, EnvVarValueDtoMapperImpl.class, + ScalingDtoMapperImpl.class, McpEndpointPathResolver.class }) class DeploymentControllerTest extends AbstractControllerNoneSecureTest { @@ -171,24 +173,6 @@ void testCreateDeployment() throws Exception { verify(deploymentService).createDeployment(any()); } - @Test - void testCreateDeployment_withMinScaleBiggerThanMaxScale() throws Exception { - var requestDtoJson = ResourceUtils.readResource("/mcp/deployment/create_deployment_request.json"); - var requestDto = objectMapper.readValue(requestDtoJson, CreateMcpDeploymentRequestDto.class); - - requestDto.setMinScale(5); - requestDto.setInitialScale(null); - requestDto.setMaxScale(2); - - var invalidRequestJson = objectMapper.writeValueAsString(requestDto); - - mockMvc.perform(post("/api/v1/deployments") - .contentType(MediaType.APPLICATION_JSON) - .content(invalidRequestJson)) - .andExpect(status().isBadRequest()) - .andExpect(jsonPath("$.message").value("minScale must be less than or equal to maxScale\n")); - } - @Test void testCreateDeployment_withCpuResourceExceedingMaxLimit() throws Exception { var requestDtoJson = ResourceUtils.readResource("/mcp/deployment/create_deployment_request.json"); diff --git a/src/test/java/com/epam/aidial/deployment/manager/web/controller/none/internal/DeploymentInternalControllerTest.java b/src/test/java/com/epam/aidial/deployment/manager/web/controller/none/internal/DeploymentInternalControllerTest.java index b77dca5f..1779679c 100644 --- a/src/test/java/com/epam/aidial/deployment/manager/web/controller/none/internal/DeploymentInternalControllerTest.java +++ b/src/test/java/com/epam/aidial/deployment/manager/web/controller/none/internal/DeploymentInternalControllerTest.java @@ -11,6 +11,7 @@ import com.epam.aidial.deployment.manager.web.mapper.DeploymentDtoMapperImpl; import com.epam.aidial.deployment.manager.web.mapper.EnvVarValueDtoMapperImpl; import com.epam.aidial.deployment.manager.web.mapper.ProbePropertiesDtoMapperImpl; +import com.epam.aidial.deployment.manager.web.mapper.ScalingDtoMapperImpl; import com.fasterxml.jackson.databind.ObjectMapper; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -36,6 +37,7 @@ DeploymentDtoMapperImpl.class, ProbePropertiesDtoMapperImpl.class, EnvVarValueDtoMapperImpl.class, + ScalingDtoMapperImpl.class, McpEndpointPathResolver.class }) class DeploymentInternalControllerTest extends AbstractControllerNoneSecureTest { diff --git a/src/test/java/com/epam/aidial/deployment/manager/web/validation/ScaleConfigurationValidatorTest.java b/src/test/java/com/epam/aidial/deployment/manager/web/validation/ScaleConfigurationValidatorTest.java deleted file mode 100644 index f10f93b9..00000000 --- a/src/test/java/com/epam/aidial/deployment/manager/web/validation/ScaleConfigurationValidatorTest.java +++ /dev/null @@ -1,114 +0,0 @@ -package com.epam.aidial.deployment.manager.web.validation; - -import com.epam.aidial.deployment.manager.web.dto.deployment.CreateDeploymentRequestDto; -import jakarta.validation.ConstraintValidatorContext; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyNoInteractions; -import static org.mockito.Mockito.when; - -class ScaleConfigurationValidatorTest { - - private ScaleConfigurationValidator validator; - private ConstraintValidatorContext context; - - @BeforeEach - void setUp() { - validator = new ScaleConfigurationValidator(); - context = mock(ConstraintValidatorContext.class); - ConstraintValidatorContext.ConstraintViolationBuilder builder = - mock(ConstraintValidatorContext.ConstraintViolationBuilder.class); - when(context.buildConstraintViolationWithTemplate(anyString())).thenReturn(builder); - when(builder.addConstraintViolation()).thenReturn(context); - } - - @Test - void shouldBeValidWhenValueIsNull() { - assertTrue(validator.isValid(null, context)); - verifyNoInteractions(context); - } - - @Test - void shouldBeValidWhenAllScalesAreNull() { - var dto = new TestCreateDeploymentRequestDto(null, null, null); - - assertTrue(validator.isValid(dto, context)); - verifyNoInteractions(context); - } - - @Test - void shouldBeValidForValuesWithinRangeAndCorrectOrder() { - var dto = new TestCreateDeploymentRequestDto(0, 5, 10); - - assertTrue(validator.isValid(dto, context)); - } - - @Test - void shouldFailWhenMinScaleBelowRange() { - var dto = new TestCreateDeploymentRequestDto(-1, 5, 10); - - assertFalse(validator.isValid(dto, context)); - verify(context).buildConstraintViolationWithTemplate( - "minScale must be between 0 and 10"); - } - - @Test - void shouldFailWhenInitialScaleAboveRange() { - var dto = new TestCreateDeploymentRequestDto(0, 11, 10); - - assertFalse(validator.isValid(dto, context)); - verify(context).buildConstraintViolationWithTemplate( - "initialScale must be between 0 and 10"); - } - - @Test - void shouldFailWhenMaxScaleAboveRange() { - var dto = new TestCreateDeploymentRequestDto(0, 5, 11); - - assertFalse(validator.isValid(dto, context)); - verify(context).buildConstraintViolationWithTemplate( - "maxScale must be between 0 and 10"); - } - - @Test - void shouldFailWhenMinScaleGreaterThanInitialScale() { - var dto = new TestCreateDeploymentRequestDto(6, 5, 10); - - assertFalse(validator.isValid(dto, context)); - verify(context).buildConstraintViolationWithTemplate( - "minScale must be less than or equal to initialScale"); - } - - @Test - void shouldFailWhenInitialScaleGreaterThanMaxScale() { - var dto = new TestCreateDeploymentRequestDto(0, 6, 5); - - assertFalse(validator.isValid(dto, context)); - verify(context).buildConstraintViolationWithTemplate( - "initialScale must be less than or equal to maxScale"); - } - - @Test - void shouldFailWhenMinScaleGreaterThanMaxScale() { - var dto = new TestCreateDeploymentRequestDto(6, 7, 5); - - assertFalse(validator.isValid(dto, context)); - verify(context).buildConstraintViolationWithTemplate( - "minScale must be less than or equal to maxScale"); - } - - private static class TestCreateDeploymentRequestDto extends CreateDeploymentRequestDto { - TestCreateDeploymentRequestDto(Integer minScale, Integer initialScale, Integer maxScale) { - setMinScale(minScale); - setInitialScale(initialScale); - setMaxScale(maxScale); - } - } -} - diff --git a/src/test/java/com/epam/aidial/deployment/manager/web/validation/ScalingValidatorTest.java b/src/test/java/com/epam/aidial/deployment/manager/web/validation/ScalingValidatorTest.java index 53e29d67..d93e930d 100644 --- a/src/test/java/com/epam/aidial/deployment/manager/web/validation/ScalingValidatorTest.java +++ b/src/test/java/com/epam/aidial/deployment/manager/web/validation/ScalingValidatorTest.java @@ -1,6 +1,8 @@ package com.epam.aidial.deployment.manager.web.validation; import com.epam.aidial.deployment.manager.web.dto.ScalingDto; +import com.epam.aidial.deployment.manager.web.dto.ScalingStrategyDto; +import com.epam.aidial.deployment.manager.web.dto.ScalingStrategyTypeDto; import jakarta.validation.ConstraintValidatorContext; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -38,21 +40,35 @@ void shouldBeValidWhenMinLessThanMax() { ScalingDto dto = new ScalingDto(); dto.setMinReplicas(1); dto.setMaxReplicas(2); + dto.setStrategy(new ScalingStrategyDto(ScalingStrategyTypeDto.ACTIVE_REQUESTS, 50)); assertThat(validator.isValid(dto, context)).isTrue(); verifyNoInteractions(context); } @Test - void shouldBeValidWhenMinEqualsMax() { + void shouldBeValidWhenMinEqualsMaxAndStrategyNull() { ScalingDto dto = new ScalingDto(); dto.setMinReplicas(2); dto.setMaxReplicas(2); + dto.setStrategy(null); assertThat(validator.isValid(dto, context)).isTrue(); verifyNoInteractions(context); } + @Test + void shouldFailWhenMinEqualsMaxAndStrategySet() { + ScalingDto dto = new ScalingDto(); + dto.setMinReplicas(2); + dto.setMaxReplicas(2); + dto.setStrategy(new ScalingStrategyDto(ScalingStrategyTypeDto.ACTIVE_REQUESTS, 10)); + + assertThat(validator.isValid(dto, context)).isFalse(); + verify(context).buildConstraintViolationWithTemplate( + "strategy must be null when minReplicas equals maxReplicas"); + } + @Test void shouldFailWhenMinGreaterThanMax() { ScalingDto dto = new ScalingDto(); diff --git a/src/test/resources/config/expected_export_config_for_import.json b/src/test/resources/config/expected_export_config_for_import.json index b494ce59..18256b09 100644 --- a/src/test/resources/config/expected_export_config_for_import.json +++ b/src/test/resources/config/expected_export_config_for_import.json @@ -93,9 +93,12 @@ } ] }, - "initialScale": 1, - "minScale": 0, - "maxScale": 5, + "scaling": { + "minReplicas": 0, + "maxReplicas": 5, + "scaleToZeroDelaySeconds": 300, + "strategy": {"$type": "pending_requests", "threshold": 10} + }, "resources": {"limits": {}, "requests": {}}, "probeProperties": { "enabled": true, @@ -130,9 +133,11 @@ } ] }, - "initialScale": 1, - "minScale": 0, - "maxScale": 5, + "scaling": { + "minReplicas": 0, + "maxReplicas": 5, + "strategy": {"$type": "active_requests", "threshold": 50} + }, "resources": {"limits": {}, "requests": {}}, "containerPort": 5000, "allowedDomains": ["*"] @@ -157,9 +162,11 @@ } ] }, - "initialScale": 1, - "minScale": 0, - "maxScale": 5, + "scaling": { + "minReplicas": 0, + "maxReplicas": 5, + "scaleToZeroDelaySeconds": 600 + }, "resources": {"limits": {}, "requests": {}}, "probeProperties": { "enabled": true, @@ -180,9 +187,6 @@ "displayName": "NIM deployment export test", "description": "NIM deployment for import test", "metadata": {"envs": []}, - "initialScale": 1, - "minScale": 0, - "maxScale": 2, "resources": {"limits": {}, "requests": {}}, "probeProperties": { "enabled": true, @@ -206,9 +210,6 @@ "description": "Inference deployment for import test", "modelFormat": "huggingface", "metadata": {"envs": []}, - "initialScale": 1, - "minScale": 0, - "maxScale": 3, "resources": {"limits": {}, "requests": {}}, "probeProperties": { "enabled": true, diff --git a/src/test/resources/manifest/knative_service_with_scaling.json b/src/test/resources/manifest/knative_service_with_scaling.json index d3d7a174..0659920b 100644 --- a/src/test/resources/manifest/knative_service_with_scaling.json +++ b/src/test/resources/manifest/knative_service_with_scaling.json @@ -8,7 +8,7 @@ "template" : { "metadata" : { "annotations" : { - "autoscaling.knative.dev/initial-scale" : "0", + "autoscaling.knative.dev/initial-scale" : "1", "autoscaling.knative.dev/max-scale" : "10", "autoscaling.knative.dev/window" : "300s", "autoscaling.knative.dev/min-scale" : "0" diff --git a/src/test/resources/mcp/deployment/all_deployments.json b/src/test/resources/mcp/deployment/all_deployments.json index 88e671f4..40ae16b3 100644 --- a/src/test/resources/mcp/deployment/all_deployments.json +++ b/src/test/resources/mcp/deployment/all_deployments.json @@ -26,9 +26,6 @@ } ] }, - "initialScale": 1, - "minScale": 0, - "maxScale": 2, "resources": { "limits": {"cpu": "10"}, "requests": {"cpu": "5"} @@ -67,9 +64,6 @@ } ] }, - "initialScale": 1, - "minScale": 0, - "maxScale": 2, "resources": { "limits": {"cpu": "10"}, "requests": {"cpu": "5"} diff --git a/src/test/resources/mcp/deployment/all_deployments_response.json b/src/test/resources/mcp/deployment/all_deployments_response.json index df97209c..9ec9fcf0 100644 --- a/src/test/resources/mcp/deployment/all_deployments_response.json +++ b/src/test/resources/mcp/deployment/all_deployments_response.json @@ -8,9 +8,6 @@ "displayName": "example deployment 1", "description": "Description of example deployment 1", "author": "user1@test.com", - "initialScale": 1, - "minScale": 0, - "maxScale": 2, "resources": { "limits": {"cpu": "10"}, "requests": {"cpu": "5"} @@ -29,9 +26,6 @@ "displayName": "example deployment 2", "description": "Description of example deployment 2", "author": "user2@test.com", - "initialScale": 1, - "minScale": 0, - "maxScale": 2, "resources": { "limits": {"cpu": "10"}, "requests": {"cpu": "5"} diff --git a/src/test/resources/mcp/deployment/create_deployment_request.json b/src/test/resources/mcp/deployment/create_deployment_request.json index a1ab3696..de15c5de 100644 --- a/src/test/resources/mcp/deployment/create_deployment_request.json +++ b/src/test/resources/mcp/deployment/create_deployment_request.json @@ -39,9 +39,6 @@ "transport": "sse", "containerPort": 8082, "allowedDomains": ["first-test-domain.com", "second-test-domain.io"], - "initialScale": 1, - "minScale": 0, - "maxScale": 2, "resources": { "limits": { "limit1": "some-value-1", diff --git a/src/test/resources/mcp/deployment/create_deployment_response.json b/src/test/resources/mcp/deployment/create_deployment_response.json index 53c79107..7965b5db 100644 --- a/src/test/resources/mcp/deployment/create_deployment_response.json +++ b/src/test/resources/mcp/deployment/create_deployment_response.json @@ -39,9 +39,6 @@ "transport": "sse", "containerPort": 8082, "allowedDomains": ["first-test-domain.com", "second-test-domain.io"], - "initialScale": 1, - "minScale": 0, - "maxScale": 2, "status": "not_deployed", "resources": { "limits": { diff --git a/src/test/resources/mcp/deployment/deployment_by_id.json b/src/test/resources/mcp/deployment/deployment_by_id.json index 2954a82e..863adfd1 100644 --- a/src/test/resources/mcp/deployment/deployment_by_id.json +++ b/src/test/resources/mcp/deployment/deployment_by_id.json @@ -66,9 +66,6 @@ "request1": "some-rvalue-1" } }, - "initialScale": 1, - "minScale": 0, - "maxScale": 2, "status": "not_deployed", "createdAt": 1745996228431, "updatedAt": 1745996228431 diff --git a/src/test/resources/mcp/deployment/deployment_by_id_response.json b/src/test/resources/mcp/deployment/deployment_by_id_response.json index 781674cb..a81af622 100644 --- a/src/test/resources/mcp/deployment/deployment_by_id_response.json +++ b/src/test/resources/mcp/deployment/deployment_by_id_response.json @@ -38,9 +38,6 @@ }, "transport": "sse", "containerPort": 8082, - "initialScale": 1, - "minScale": 0, - "maxScale": 2, "status": "not_deployed", "createdAt": 1745996228431, "updatedAt": 1745996228431 diff --git a/src/test/resources/mcp/deployment/duplicate_deployment_response.json b/src/test/resources/mcp/deployment/duplicate_deployment_response.json index ec09fad8..e8602286 100644 --- a/src/test/resources/mcp/deployment/duplicate_deployment_response.json +++ b/src/test/resources/mcp/deployment/duplicate_deployment_response.json @@ -38,9 +38,6 @@ }, "transport": "sse", "containerPort": 8082, - "initialScale": 1, - "minScale": 0, - "maxScale": 2, "status": "not_deployed", "resources": { "limits": { diff --git a/src/test/resources/mcp/deployment/internal/deployment_by_id.json b/src/test/resources/mcp/deployment/internal/deployment_by_id.json index 411a9972..9fa6805e 100644 --- a/src/test/resources/mcp/deployment/internal/deployment_by_id.json +++ b/src/test/resources/mcp/deployment/internal/deployment_by_id.json @@ -22,9 +22,6 @@ "limits": {}, "requests": {} }, - "initialScale": 1, - "minScale": 0, - "maxScale": 2, "status": "running", "url": "https://example.com/example-deployment-1" } \ No newline at end of file diff --git a/src/test/resources/mcp/deployment/update_deployment_request.json b/src/test/resources/mcp/deployment/update_deployment_request.json index d5a89087..5cfcce98 100644 --- a/src/test/resources/mcp/deployment/update_deployment_request.json +++ b/src/test/resources/mcp/deployment/update_deployment_request.json @@ -27,8 +27,5 @@ } ] }, - "allowedDomains": ["first-test-domain.com", "second-test-domain.io"], - "initialScale": 1, - "minScale": 0, - "maxScale": 2 + "allowedDomains": ["first-test-domain.com", "second-test-domain.io"] } \ No newline at end of file diff --git a/src/test/resources/mcp/deployment/update_deployment_response.json b/src/test/resources/mcp/deployment/update_deployment_response.json index 95f8dbf7..237ad4dd 100644 --- a/src/test/resources/mcp/deployment/update_deployment_response.json +++ b/src/test/resources/mcp/deployment/update_deployment_response.json @@ -37,9 +37,6 @@ ] }, "allowedDomains": ["first-test-domain.com", "second-test-domain.io"], - "initialScale": 1, - "minScale": 0, - "maxScale": 2, "status": "not_deployed", "createdAt": 1745996228431, "updatedAt": 1745996228431