Skip to content
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
7867b3b
feat: support scaling config for knative deployment types
oleksii-donets Feb 27, 2026
11e8e09
feat: improvements
oleksii-donets Feb 27, 2026
257d514
Merge branch 'development' into feature/161-scaling-knative-types
oleksii-donets Mar 2, 2026
0c139a3
Merge branch 'development' into feature/161-scaling-knative-types
oleksii-donets Mar 2, 2026
39854eb
refactor: flatten nested if-cases in scaling strategy handling
oleksii-donets Mar 2, 2026
e38597d
chore: fix PR comments
oleksii-donets Mar 2, 2026
b2eb3bf
chore: fix tests
oleksii-donets Mar 2, 2026
76fd8b4
Merge branch 'development' into feature/161-scaling-knative-types
oleksii-donets Mar 3, 2026
8d3a4aa
Merge branch 'development' into feature/161-scaling-knative-types
oleksii-donets Mar 4, 2026
24dc564
Merge branch 'development' into feature/161-scaling-knative-types
oleksii-donets Mar 5, 2026
153425d
refactor: move scaling from child deployment classes to base class
oleksii-donets Mar 5, 2026
0df2519
refactor: replace V1.47 knative scaling migration with base table mig…
oleksii-donets Mar 5, 2026
cddd188
fix: address PR review comments - remove redundant annotations and ad…
oleksii-donets Mar 6, 2026
bd5f638
Merge development into feature/161-scaling-knative-types
oleksii-donets Mar 6, 2026
a554e5c
chore: delete local claude config .claude/settings.local.json
oleksii-donets Mar 6, 2026
4f4be18
refactor: remove deprecated initialScale/minScale/maxScale fields
oleksii-donets Mar 6, 2026
6b7d40d
Merge branch 'development' into feature/161-scaling-knative-types
oleksii-donets Mar 6, 2026
d234f5d
refactor: drop deprecated initial_scale/min_scale/max_scale columns
oleksii-donets Mar 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,8 @@ public class DeploymentEntity {
@JdbcTypeCode(SqlTypes.JSON)
private PersistenceDeploymentMetadata metadata;

@Column(name = "initial_scale")
private Integer initialScale;

@Column(name = "min_scale")
private Integer minScale;

@Column(name = "max_scale")
private Integer maxScale;
@JdbcTypeCode(SqlTypes.JSON)
private PersistenceScaling scaling;

@JdbcTypeCode(SqlTypes.JSON)
private PersistenceResources resources;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,4 @@ public class InferenceDeploymentEntity extends DeploymentEntity {

@JdbcTypeCode(SqlTypes.JSON)
private List<String> args;

@JdbcTypeCode(SqlTypes.JSON)
private PersistenceScaling scaling;
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,15 +118,13 @@ public void updateEntityFromDomain(Deployment domain, DeploymentEntity existingE
existingEntity.setUrl(updatedEntity.getUrl());
existingEntity.setStatus(updatedEntity.getStatus());
existingEntity.setContainerPort(updatedEntity.getContainerPort());
existingEntity.setInitialScale(updatedEntity.getInitialScale());
existingEntity.setMaxScale(updatedEntity.getMaxScale());
existingEntity.setMinScale(updatedEntity.getMinScale());
existingEntity.setEnvs(updatedEntity.getEnvs());
existingEntity.setMetadata(updatedEntity.getMetadata());
existingEntity.setResources(updatedEntity.getResources());
existingEntity.setProbeProperties(updatedEntity.getProbeProperties());
existingEntity.setAuthor(updatedEntity.getAuthor());
existingEntity.setAllowedDomains(updatedEntity.getAllowedDomains());
existingEntity.setScaling(updatedEntity.getScaling());

if (existingEntity instanceof McpDeploymentEntity existingMcp
&& updatedEntity instanceof McpDeploymentEntity updatedMcp) {
Expand All @@ -146,7 +144,6 @@ public void updateEntityFromDomain(Deployment domain, DeploymentEntity existingE
existingInference.setSource(updatedInference.getSource());
existingInference.setCommand(updatedInference.getCommand());
existingInference.setArgs(updatedInference.getArgs());
existingInference.setScaling(updatedInference.getScaling());
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package com.epam.aidial.deployment.manager.model.deployment;

import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
import lombok.experimental.SuperBuilder;

@SuperBuilder
@AllArgsConstructor
@NoArgsConstructor
public class AdapterDeployment extends Deployment {
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package com.epam.aidial.deployment.manager.model.deployment;

import lombok.NoArgsConstructor;
import lombok.experimental.SuperBuilder;

@SuperBuilder
@NoArgsConstructor
public class CreateAdapterDeployment extends CreateDeployment {
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.epam.aidial.deployment.manager.model.DeploymentMetadata;
import com.epam.aidial.deployment.manager.model.ImageType;
import com.epam.aidial.deployment.manager.model.Resources;
import com.epam.aidial.deployment.manager.model.Scaling;
import com.epam.aidial.deployment.manager.model.probe.ProbeProperties;
import lombok.AllArgsConstructor;
import lombok.Data;
Expand All @@ -25,9 +26,7 @@ public abstract class CreateDeployment {
private String displayName;
private String description;
private DeploymentMetadata metadata;
private Integer initialScale;
private Integer minScale;
private Integer maxScale;
private Scaling scaling;
private Resources resources;
private ProbeProperties probeProperties;
private Integer containerPort;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.epam.aidial.deployment.manager.model.deployment;

import com.epam.aidial.deployment.manager.model.Scaling;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
Expand All @@ -26,6 +25,4 @@ public class CreateInferenceDeployment extends CreateDeployment {
private List<String> command;
@Nullable
private List<String> args;
@Nullable
private Scaling scaling;
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package com.epam.aidial.deployment.manager.model.deployment;

import lombok.NoArgsConstructor;
import lombok.experimental.SuperBuilder;

@SuperBuilder
@NoArgsConstructor
public class CreateInterceptorDeployment extends CreateDeployment {
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.epam.aidial.deployment.manager.model.EnvVar;
import com.epam.aidial.deployment.manager.model.ImageType;
import com.epam.aidial.deployment.manager.model.Resources;
import com.epam.aidial.deployment.manager.model.Scaling;
import com.epam.aidial.deployment.manager.model.probe.ProbeProperties;
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
Expand Down Expand Up @@ -39,9 +40,7 @@ public abstract class Deployment {
private String description;
private List<EnvVar> envs;
private DeploymentMetadata metadata;
private Integer initialScale;
private Integer minScale;
private Integer maxScale;
private Scaling scaling;
private Resources resources;
private ProbeProperties probeProperties;
private DeploymentStatus status;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.epam.aidial.deployment.manager.model.deployment;

import com.epam.aidial.deployment.manager.model.Scaling;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
Expand All @@ -26,6 +25,4 @@ public class InferenceDeployment extends Deployment {
private List<String> command;
@Nullable
private List<String> args;
@Nullable
private Scaling scaling;
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package com.epam.aidial.deployment.manager.model.deployment;

import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;
import lombok.experimental.SuperBuilder;

@SuperBuilder
@AllArgsConstructor
@NoArgsConstructor
public class InterceptorDeployment extends Deployment {
}
Original file line number Diff line number Diff line change
Expand Up @@ -415,9 +415,7 @@ private static boolean isApplicableForRollingUpdate(Deployment existing, Deploym
|| specializedUpdate
|| !Objects.equals(existing.getImageDefinitionId(), updated.getImageDefinitionId())
|| !Objects.equals(existing.getContainerPort(), updated.getContainerPort())
|| !Objects.equals(existing.getInitialScale(), updated.getInitialScale())
|| !Objects.equals(existing.getMinScale(), updated.getMinScale())
|| !Objects.equals(existing.getMaxScale(), updated.getMaxScale())
|| !Objects.equals(existing.getScaling(), updated.getScaling())
|| !Objects.equals(existing.getResources(), updated.getResources());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,7 @@ protected Service prepareServiceSpec(Deployment deployment) {
userDefinedSensitiveEnvs,
userDefinedSensitiveFileEnvs,
imageDefinition.getImageName(),
deployment.getInitialScale(),
deployment.getMinScale(),
deployment.getMaxScale(),
deployment.getScaling(),
deployment.getResources(),
containerPort,
deployment.getProbeProperties());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.epam.aidial.deployment.manager.service.manifest;

import com.epam.aidial.deployment.manager.configuration.AppProperties;
import com.epam.aidial.deployment.manager.model.ScalingStrategyType;
import com.epam.aidial.deployment.manager.model.SensitiveEnvVar;
import com.epam.aidial.deployment.manager.model.SimpleEnvVar;
import com.epam.aidial.deployment.manager.utils.mapping.ListMapper;
Expand All @@ -14,6 +15,9 @@

public abstract class DeployableManifestGenerator extends BaseManifestGenerator {

protected static final List<ScalingStrategyType> SUPPORTED_SCALING_STRATEGIES =
List.of(ScalingStrategyType.ACTIVE_REQUESTS);

public DeployableManifestGenerator(AppProperties appconfig) {
super(appconfig);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,17 +180,8 @@ private void applyScaling(String name,
var annotations = config.get(InferenceMappers.SERVICE_METADATA_FIELD)
.get(InferenceMappers.METADATA_ANNOTATIONS_FIELD).data();
annotations.put("autoscaling.knative.dev/initial-scale", String.valueOf(initialScale));
log.trace("Set annotation autoscaling.knative.dev/initial-scale={} for model '{}'", initialScale, name);

if (scaling.getStrategy().getType() == ScalingStrategyType.ACTIVE_REQUESTS) {
predictor.setScaleMetric(Predictor.ScaleMetric.CONCURRENCY);
predictor.setScaleTarget(scaling.getStrategy().getThreshold());
log.trace("Applied strategy ACTIVE_REQUESTS: metric={}, target={} for model '{}'",
Predictor.ScaleMetric.CONCURRENCY, scaling.getStrategy().getThreshold(), name);
} else {
throw new IllegalArgumentException("Scaling strategy '%s' is not supported. Supported strategies: %s"
.formatted(scaling.getStrategy().getType(), List.of(ScalingStrategyType.ACTIVE_REQUESTS)));
}
log.trace("Set min-scale={}, max-scale={}, initial-scale={} for Inference deployment '{}'",
scaling.getMinReplicas(), scaling.getMaxReplicas(), initialScale, name);

if (scaling.getScaleToZeroDelaySeconds() != null) {
var delay = scaling.getScaleToZeroDelaySeconds();
Expand All @@ -199,6 +190,20 @@ private void applyScaling(String name,
log.trace("Set annotation autoscaling.knative.dev/scale-to-zero-pod-retention-period={} for model '{}'",
delayStr, name);
}

if (scaling.getStrategy() == null) {
return;
}

if (scaling.getStrategy().getType() == ScalingStrategyType.ACTIVE_REQUESTS) {
predictor.setScaleMetric(Predictor.ScaleMetric.CONCURRENCY);
predictor.setScaleTarget(scaling.getStrategy().getThreshold());
log.trace("Applied strategy ACTIVE_REQUESTS: target={} for model '{}'",
scaling.getStrategy().getThreshold(), name);
} else {
throw new IllegalArgumentException("Scaling strategy '%s' is not supported. Supported strategies: %s"
.formatted(scaling.getStrategy().getType(), SUPPORTED_SCALING_STRATEGIES));
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import com.epam.aidial.deployment.manager.configuration.AppProperties;
import com.epam.aidial.deployment.manager.configuration.logging.LogExecution;
import com.epam.aidial.deployment.manager.model.Resources;
import com.epam.aidial.deployment.manager.model.Scaling;
import com.epam.aidial.deployment.manager.model.ScalingStrategyType;
import com.epam.aidial.deployment.manager.model.SensitiveEnvVar;
import com.epam.aidial.deployment.manager.model.SensitiveFileEnvVar;
import com.epam.aidial.deployment.manager.model.SimpleEnvVar;
Expand Down Expand Up @@ -30,7 +32,6 @@
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

import java.util.HashMap;
import java.util.List;

@Slf4j
Expand Down Expand Up @@ -58,9 +59,7 @@ public Service serviceConfig(
List<SensitiveEnvVar> sensitiveEnv,
List<SensitiveFileEnvVar> sensitiveFileEnvs,
String imageName,
@Nullable Integer initScale,
@Nullable Integer minScale,
@Nullable Integer maxScale,
@Nullable Scaling scaling,
Resources resources,
@Nullable Integer containerPort,
@Nullable ProbeProperties probeProperties
Expand All @@ -74,9 +73,9 @@ public Service serviceConfig(
var template = config.get(KnativeMappers.SERVICE_SPEC_FIELD)
.get(KnativeMappers.SERVICE_TEMPLATE_FIELD);

configureAnnotations(template, initScale, minScale, maxScale, probeProperties);

var revisionSpecChain = template.get(KnativeMappers.SERVICE_TEMPLATE_SPEC_FIELD);
applyScaling(name, scaling, template, revisionSpecChain);
applyProgressDeadline(probeProperties, template);
var containerChain = revisionSpecChain
.getList(KnativeMappers.TEMPLATE_CONTAINERS_FIELD, Mappers.CONTAINER_NAME)
.getOrDefault(appConfig.getKnativeServiceContainerConfig().getName(), appConfig::cloneKnativeServiceContainer);
Expand Down Expand Up @@ -158,34 +157,56 @@ private void addSecretVolumesAndMountsAndApplySensitiveFileEnv(
envVarChain.data().setValue(filePathToVolume);
}

private void configureAnnotations(
MappingChain<RevisionTemplateSpec> template,
@Nullable Integer initScale,
@Nullable Integer minScale,
@Nullable Integer maxScale,
@Nullable ProbeProperties probeProperties
) {
var templateMetadata = template.get(KnativeMappers.SERVICE_TEMPLATE_METADATA_FIELD).data();
var annotations = (templateMetadata.getAnnotations() != null)
? templateMetadata.getAnnotations()
: new HashMap<String, String>();
private void applyScaling(String name,
@Nullable Scaling scaling,
MappingChain<RevisionTemplateSpec> template,
MappingChain<RevisionSpec> revisionSpecChain) {
log.debug("Applying scaling for Knative deployment '{}': {}", name, scaling);
if (scaling == null) {
return;
}

if (initScale != null) {
annotations.put("autoscaling.knative.dev/initial-scale", String.valueOf(initScale));
var annotations = template.get(KnativeMappers.SERVICE_TEMPLATE_METADATA_FIELD)
.get(KnativeMappers.METADATA_ANNOTATIONS_FIELD).data();

var initialScale = Math.max(scaling.getMinReplicas(), 1);
annotations.put("autoscaling.knative.dev/initial-scale", String.valueOf(initialScale));
annotations.put("autoscaling.knative.dev/min-scale", String.valueOf(scaling.getMinReplicas()));
annotations.put("autoscaling.knative.dev/max-scale", String.valueOf(scaling.getMaxReplicas()));
log.trace("Set min-scale={}, max-scale={}, initial-scale={} for Knative deployment '{}'",
scaling.getMinReplicas(), scaling.getMaxReplicas(), initialScale, name);

if (scaling.getScaleToZeroDelaySeconds() != null) {
var delayStr = scaling.getScaleToZeroDelaySeconds() + "s";
annotations.put("autoscaling.knative.dev/scale-to-zero-pod-retention-period", delayStr);
log.trace("Set annotation autoscaling.knative.dev/scale-to-zero-pod-retention-period={} for Knative deployment '{}'",
delayStr, name);
}
if (minScale != null) {
annotations.put("autoscaling.knative.dev/min-scale", String.valueOf(minScale));

if (scaling.getStrategy() == null) {
return;
}
if (maxScale != null) {
annotations.put("autoscaling.knative.dev/max-scale", String.valueOf(maxScale));

if (scaling.getStrategy().getType() == ScalingStrategyType.ACTIVE_REQUESTS) {
var target = scaling.getStrategy().getThreshold();
revisionSpecChain.data().setContainerConcurrency((long) target);
annotations.put("autoscaling.knative.dev/target", String.valueOf(target));
log.trace("Applied strategy ACTIVE_REQUESTS: target={} for deployment '{}'",
scaling.getStrategy().getThreshold(), name);
} else {
throw new IllegalArgumentException("Scaling strategy '%s' is not supported. Supported strategies: %s"
.formatted(scaling.getStrategy().getType(), SUPPORTED_SCALING_STRATEGIES));
}
}

private void applyProgressDeadline(@Nullable ProbeProperties probeProperties,
MappingChain<RevisionTemplateSpec> template) {
var progressDeadline = progressDeadlineCalculator.compute(probeProperties);
if (progressDeadline != null) {
var annotations = template.get(KnativeMappers.SERVICE_TEMPLATE_METADATA_FIELD)
.get(KnativeMappers.METADATA_ANNOTATIONS_FIELD).data();
annotations.put("serving.knative.dev/progress-deadline", progressDeadline);
}

templateMetadata.setAnnotations(annotations);
}

private EnvVarSource buildKnativeSecretRef(SensitiveEnvVar env) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import lombok.experimental.UtilityClass;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

@UtilityClass
public class KnativeMappers {
Expand All @@ -35,6 +37,11 @@ public class KnativeMappers {
RevisionTemplateSpec::getMetadata,
RevisionTemplateSpec::setMetadata);

public static final FieldMapper<ObjectMeta, Map<String, String>> METADATA_ANNOTATIONS_FIELD = new FieldMapper<>(
HashMap::new,
ObjectMeta::getAnnotations,
ObjectMeta::setAnnotations);

public static final FieldMapper<RevisionTemplateSpec, RevisionSpec> SERVICE_TEMPLATE_SPEC_FIELD = new FieldMapper<>(
RevisionSpec::new,
RevisionTemplateSpec::getSpec,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ public record DeploymentInfoDto(
@NotNull String displayName,
@Nullable String description,
@Nullable String author,
@Nullable Integer initialScale,
@Nullable Integer minScale,
@Nullable Integer maxScale,
@Nullable ResourcesDto resources,
@NotNull DeploymentStatusDto status,
@Nullable String url,
Expand Down
Loading
Loading