Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
7867b3b
feat: support scaling config for knative deployment types
oleksii-donets Feb 27, 2026
11e8e09
feat: improvements
oleksii-donets Feb 27, 2026
257d514
Merge branch 'development' into feature/161-scaling-knative-types
oleksii-donets Mar 2, 2026
0c139a3
Merge branch 'development' into feature/161-scaling-knative-types
oleksii-donets Mar 2, 2026
39854eb
refactor: flatten nested if-cases in scaling strategy handling
oleksii-donets Mar 2, 2026
e38597d
chore: fix PR comments
oleksii-donets Mar 2, 2026
b2eb3bf
chore: fix tests
oleksii-donets Mar 2, 2026
76fd8b4
Merge branch 'development' into feature/161-scaling-knative-types
oleksii-donets Mar 3, 2026
8d3a4aa
Merge branch 'development' into feature/161-scaling-knative-types
oleksii-donets Mar 4, 2026
24dc564
Merge branch 'development' into feature/161-scaling-knative-types
oleksii-donets Mar 5, 2026
153425d
refactor: move scaling from child deployment classes to base class
oleksii-donets Mar 5, 2026
0df2519
refactor: replace V1.47 knative scaling migration with base table mig…
oleksii-donets Mar 5, 2026
cddd188
fix: address PR review comments - remove redundant annotations and ad…
oleksii-donets Mar 6, 2026
bd5f638
Merge development into feature/161-scaling-knative-types
oleksii-donets Mar 6, 2026
a554e5c
chore: delete local claude config .claude/settings.local.json
oleksii-donets Mar 6, 2026
4f4be18
refactor: remove deprecated initialScale/minScale/maxScale fields
oleksii-donets Mar 6, 2026
6b7d40d
Merge branch 'development' into feature/161-scaling-knative-types
oleksii-donets Mar 6, 2026
d234f5d
refactor: drop deprecated initial_scale/min_scale/max_scale columns
oleksii-donets Mar 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,23 @@
import jakarta.persistence.Entity;
import jakarta.persistence.EntityListeners;
import jakarta.persistence.Table;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
import org.springframework.data.jpa.domain.support.AuditingEntityListener;

@Entity
@Table(name = "adapter_deployment")
@EntityListeners(AuditingEntityListener.class)
@Getter
@Setter
@NoArgsConstructor
@EqualsAndHashCode(callSuper = true)
public class AdapterDeploymentEntity extends DeploymentEntity {

@JdbcTypeCode(SqlTypes.JSON)
private PersistenceScaling scaling;
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,23 @@
import jakarta.persistence.Entity;
import jakarta.persistence.EntityListeners;
import jakarta.persistence.Table;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
import org.springframework.data.jpa.domain.support.AuditingEntityListener;

@Entity
@Table(name = "interceptor_deployment")
@EntityListeners(AuditingEntityListener.class)
@Getter
@Setter
@NoArgsConstructor
@EqualsAndHashCode(callSuper = true)
public class InterceptorDeploymentEntity extends DeploymentEntity {

@JdbcTypeCode(SqlTypes.JSON)
private PersistenceScaling scaling;
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.epam.aidial.deployment.manager.dao.entity.deployment;

import com.epam.aidial.deployment.manager.dao.entity.PersistenceMcpTransport;
import com.epam.aidial.deployment.manager.dao.entity.deployment.PersistenceScaling;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.EntityListeners;
Expand All @@ -11,6 +12,8 @@
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
import org.springframework.data.jpa.domain.support.AuditingEntityListener;

@Entity
Expand All @@ -27,4 +30,7 @@ public class McpDeploymentEntity extends DeploymentEntity {

@Column(name = "mcp_endpoint_path")
private String mcpEndpointPath;

@JdbcTypeCode(SqlTypes.JSON)
private PersistenceScaling scaling;
}
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,17 @@ public void updateEntityFromDomain(Deployment domain, DeploymentEntity existingE
&& updatedEntity instanceof McpDeploymentEntity updatedMcp) {
existingMcp.setTransport(updatedMcp.getTransport());
existingMcp.setMcpEndpointPath(updatedMcp.getMcpEndpointPath());
existingMcp.setScaling(updatedMcp.getScaling());
}

if (existingEntity instanceof AdapterDeploymentEntity existingAdapter
&& updatedEntity instanceof AdapterDeploymentEntity updatedAdapter) {
existingAdapter.setScaling(updatedAdapter.getScaling());
}

if (existingEntity instanceof InterceptorDeploymentEntity existingInterceptor
&& updatedEntity instanceof InterceptorDeploymentEntity updatedInterceptor) {
existingInterceptor.setScaling(updatedInterceptor.getScaling());
}

if (existingEntity instanceof NimDeploymentEntity existingNim
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
package com.epam.aidial.deployment.manager.model.deployment;

import com.epam.aidial.deployment.manager.model.Scaling;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.experimental.SuperBuilder;
import org.jetbrains.annotations.Nullable;

@Getter
@Setter
@SuperBuilder
@NoArgsConstructor
@AllArgsConstructor
public class AdapterDeployment extends Deployment {
@Nullable
private Scaling scaling;
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
package com.epam.aidial.deployment.manager.model.deployment;

import com.epam.aidial.deployment.manager.model.Scaling;
import lombok.Getter;
import lombok.Setter;
import lombok.experimental.SuperBuilder;
import org.jetbrains.annotations.Nullable;

@Getter
@Setter
@SuperBuilder
public class CreateAdapterDeployment extends CreateDeployment {
@Nullable
private Scaling scaling;
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
package com.epam.aidial.deployment.manager.model.deployment;

import com.epam.aidial.deployment.manager.model.Scaling;
import lombok.Getter;
import lombok.Setter;
import lombok.experimental.SuperBuilder;
import org.jetbrains.annotations.Nullable;

@Getter
@Setter
@SuperBuilder
public class CreateInterceptorDeployment extends CreateDeployment {
@Nullable
private Scaling scaling;
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
package com.epam.aidial.deployment.manager.model.deployment;

import com.epam.aidial.deployment.manager.model.McpTransport;
import com.epam.aidial.deployment.manager.model.Scaling;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.ToString;
import lombok.experimental.SuperBuilder;
import org.jetbrains.annotations.Nullable;

@Getter
@Setter
Expand All @@ -19,4 +21,6 @@
public class CreateMcpDeployment extends CreateDeployment {
private McpTransport transport;
private String mcpEndpointPath;
@Nullable
private Scaling scaling;
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
package com.epam.aidial.deployment.manager.model.deployment;

import com.epam.aidial.deployment.manager.model.Scaling;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.experimental.SuperBuilder;
import org.jetbrains.annotations.Nullable;

@Getter
@Setter
@SuperBuilder
@NoArgsConstructor
@AllArgsConstructor
public class InterceptorDeployment extends Deployment {
@Nullable
private Scaling scaling;
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
package com.epam.aidial.deployment.manager.model.deployment;

import com.epam.aidial.deployment.manager.model.McpTransport;
import com.epam.aidial.deployment.manager.model.Scaling;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.ToString;
import lombok.experimental.SuperBuilder;
import org.jetbrains.annotations.Nullable;

@Getter
@Setter
Expand All @@ -19,4 +21,6 @@
public class McpDeployment extends Deployment {
private McpTransport transport;
private String mcpEndpointPath;
@Nullable
private Scaling scaling;
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,14 @@
import com.epam.aidial.deployment.manager.model.InterceptorImageDefinition;
import com.epam.aidial.deployment.manager.model.McpImageDefinition;
import com.epam.aidial.deployment.manager.model.PodInfo;
import com.epam.aidial.deployment.manager.model.Scaling;
import com.epam.aidial.deployment.manager.model.SimpleEnvVar;
import com.epam.aidial.deployment.manager.model.deployment.AdapterDeployment;
import com.epam.aidial.deployment.manager.model.deployment.CreateDeployment;
import com.epam.aidial.deployment.manager.model.deployment.Deployment;
import com.epam.aidial.deployment.manager.model.deployment.InferenceDeployment;
import com.epam.aidial.deployment.manager.model.deployment.InterceptorDeployment;
import com.epam.aidial.deployment.manager.model.deployment.McpDeployment;
import com.epam.aidial.deployment.manager.model.deployment.NimDeployment;
import com.epam.aidial.deployment.manager.service.ImageDefinitionService;
import com.epam.aidial.deployment.manager.service.security.SecurityClaimsExtractor;
Expand Down Expand Up @@ -418,9 +422,20 @@ private static boolean isApplicableForRollingUpdate(Deployment existing, Deploym
|| !Objects.equals(existing.getInitialScale(), updated.getInitialScale())
|| !Objects.equals(existing.getMinScale(), updated.getMinScale())
|| !Objects.equals(existing.getMaxScale(), updated.getMaxScale())
|| !Objects.equals(getScaling(existing), getScaling(updated))
|| !Objects.equals(existing.getResources(), updated.getResources());
}

private static Scaling getScaling(Deployment deployment) {
return switch (deployment) {
case McpDeployment mcp -> mcp.getScaling();
case AdapterDeployment adapter -> adapter.getScaling();
case InterceptorDeployment interceptor -> interceptor.getScaling();
case InferenceDeployment inference -> inference.getScaling();
default -> null;
};
}

private static boolean isApplicableForCiliumNetworkPolicyUpdate(Deployment existing, Deployment updated) {
return !CollectionUtils.isEqualCollection(existing.getAllowedDomains(), updated.getAllowedDomains())
|| !Objects.equals(existing.getContainerPort(), updated.getContainerPort());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import com.epam.aidial.deployment.manager.kubernetes.KubernetesConditionConstants;
import com.epam.aidial.deployment.manager.kubernetes.knative.K8sKnativeClient;
import com.epam.aidial.deployment.manager.model.DeploymentStatus;
import com.epam.aidial.deployment.manager.model.Scaling;
import com.epam.aidial.deployment.manager.model.SensitiveEnvVar;
import com.epam.aidial.deployment.manager.model.SensitiveFileEnvVar;
import com.epam.aidial.deployment.manager.model.SimpleEnvVar;
Expand Down Expand Up @@ -117,14 +118,25 @@ protected Service prepareServiceSpec(Deployment deployment) {
userDefinedSensitiveEnvs,
userDefinedSensitiveFileEnvs,
imageDefinition.getImageName(),
deployment.getInitialScale(),
deployment.getMinScale(),
deployment.getMaxScale(),
getScaling(deployment),
deployment.getResources(),
containerPort,
deployment.getProbeProperties());
}

private Scaling getScaling(Deployment deployment) {
if (deployment instanceof McpDeployment mcpDeployment) {
return mcpDeployment.getScaling();
}
if (deployment instanceof InterceptorDeployment interceptorDeployment) {
return interceptorDeployment.getScaling();
}
if (deployment instanceof AdapterDeployment adapterDeployment) {
return adapterDeployment.getScaling();
}
return null;
}

@Override
protected void createService(String namespace, Service service) {
k8sKnativeClient.createService(namespace, service);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.epam.aidial.deployment.manager.service.manifest;

import com.epam.aidial.deployment.manager.configuration.AppProperties;
import com.epam.aidial.deployment.manager.model.ScalingStrategyType;
import com.epam.aidial.deployment.manager.model.SensitiveEnvVar;
import com.epam.aidial.deployment.manager.model.SimpleEnvVar;
import com.epam.aidial.deployment.manager.utils.mapping.ListMapper;
Expand All @@ -14,6 +15,9 @@

public abstract class DeployableManifestGenerator extends BaseManifestGenerator {

protected static final List<ScalingStrategyType> SUPPORTED_SCALING_STRATEGIES =
List.of(ScalingStrategyType.ACTIVE_REQUESTS);

public DeployableManifestGenerator(AppProperties appconfig) {
super(appconfig);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public class InferenceManifestGenerator extends DeployableManifestGenerator {
private final KserveProbeConverter kserveProbeConverter;

public InferenceManifestGenerator(AppProperties appconfig,
KserveProbeConverter kserveProbeConverter) {
KserveProbeConverter kserveProbeConverter) {
super(appconfig);
this.kserveProbeConverter = kserveProbeConverter;
}
Expand Down Expand Up @@ -166,17 +166,8 @@ private void applyScaling(String name,
var annotations = config.get(InferenceMappers.SERVICE_METADATA_FIELD)
.get(InferenceMappers.METADATA_ANNOTATIONS_FIELD).data();
annotations.put("autoscaling.knative.dev/initial-scale", String.valueOf(initialScale));
log.trace("Set annotation autoscaling.knative.dev/initial-scale={} for model '{}'", initialScale, name);

if (scaling.getStrategy().getType() == ScalingStrategyType.ACTIVE_REQUESTS) {
predictor.setScaleMetric(Predictor.ScaleMetric.CONCURRENCY);
predictor.setScaleTarget(scaling.getStrategy().getThreshold());
log.trace("Applied strategy ACTIVE_REQUESTS: metric={}, target={} for model '{}'",
Predictor.ScaleMetric.CONCURRENCY, scaling.getStrategy().getThreshold(), name);
} else {
throw new IllegalArgumentException("Scaling strategy '%s' is not supported. Supported strategies: %s"
.formatted(scaling.getStrategy().getType(), List.of(ScalingStrategyType.ACTIVE_REQUESTS)));
}
log.trace("Set min-scale={}, max-scale={}, initial-scale={} for Inference deployment '{}'",
scaling.getMinReplicas(), scaling.getMaxReplicas(), initialScale, name);

if (scaling.getScaleToZeroDelaySeconds() != null) {
var delay = scaling.getScaleToZeroDelaySeconds();
Expand All @@ -185,6 +176,20 @@ private void applyScaling(String name,
log.trace("Set annotation autoscaling.knative.dev/scale-to-zero-pod-retention-period={} for model '{}'",
delayStr, name);
}

if (scaling.getStrategy() == null) {
return;
}

if (scaling.getStrategy().getType() == ScalingStrategyType.ACTIVE_REQUESTS) {
predictor.setScaleMetric(Predictor.ScaleMetric.CONCURRENCY);
predictor.setScaleTarget(scaling.getStrategy().getThreshold());
log.trace("Applied strategy ACTIVE_REQUESTS: target={} for model '{}'",
scaling.getStrategy().getThreshold(), name);
} else {
throw new IllegalArgumentException("Scaling strategy '%s' is not supported. Supported strategies: %s"
.formatted(scaling.getStrategy().getType(), SUPPORTED_SCALING_STRATEGIES));
}
}

}
Loading
Loading