Skip to content

Commit a2ccb83

Browse files
committed
Updated deployment
1 parent 9530323 commit a2ccb83

File tree

3 files changed

+60
-9
lines changed

3 files changed

+60
-9
lines changed

.github/workflows/deploy-azure.yml

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,25 +154,54 @@ jobs:
154154
kubectl get pods
155155
kubectl get services
156156
157-
echo "Waiting for Memgraph pod to be ready (may take up to 5 minutes)..."
158-
if ! kubectl wait --for=condition=ready pod -l app=memgraph --timeout=5m; then
157+
echo "Waiting for Memgraph pod to be ready (may take up to 10 minutes)..."
158+
# First, check if there are any existing failed pods and delete them
159+
FAILED_PODS=$(kubectl get pods -l app=memgraph -o jsonpath="{.items[?(@.status.phase=='Failed' || @.status.containerStatuses[0].state.waiting.reason=='CrashLoopBackOff')].metadata.name}")
160+
if [[ -n "$FAILED_PODS" ]]; then
161+
echo "Found failed pods, cleaning up before proceeding:"
162+
echo "$FAILED_PODS" | tr ' ' '\n' | while read pod; do
163+
echo "Deleting failed pod: $pod"
164+
kubectl delete pod $pod
165+
done
166+
echo "Waiting for cleanup to complete..."
167+
sleep 30
168+
fi
169+
170+
# Wait for pod to be ready with increased timeout and check interval
171+
if ! kubectl wait --for=condition=ready pod -l app=memgraph --timeout=10m; then
159172
echo "Error: Memgraph pod did not become ready within the timeout period."
160173
echo "Checking Memgraph pod logs:"
161174
POD_NAME=$(kubectl get pods -l app=memgraph -o jsonpath="{.items[0].metadata.name}")
162-
kubectl logs $POD_NAME
175+
176+
echo "Pod details:"
163177
kubectl describe pod $POD_NAME
164-
echo "Deployment verification failed!"
178+
179+
echo "Pod logs:"
180+
kubectl logs $POD_NAME --tail=100
181+
182+
echo "Checking node resource utilization:"
183+
kubectl describe nodes
184+
185+
echo "Deployment verification failed! Please check the Kubernetes cluster's resources and capacity."
165186
exit 1
166187
fi
167188
168189
echo "Memgraph deployment successful!"
169190
170191
# Wait for the LoadBalancer service to get an external IP
171192
echo "Waiting for LoadBalancer to get external IP..."
172-
for i in {1..30}; do
193+
for i in {1..45}; do
173194
EXTERNAL_IP=$(kubectl get service memgraph -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
174195
if [[ -n "$EXTERNAL_IP" ]]; then
175196
echo "Memgraph is accessible at: ${EXTERNAL_IP}:7687"
197+
198+
# Add a simple connectivity check
199+
echo "Performing basic connectivity test..."
200+
if nc -z -w 5 ${EXTERNAL_IP} 7687; then
201+
echo "Successfully connected to Memgraph on port 7687!"
202+
else
203+
echo "Warning: Could not connect to Memgraph on port 7687. The service may not be fully ready yet."
204+
fi
176205
break
177206
fi
178207
echo "Waiting for external IP (attempt $i)..."
@@ -181,4 +210,6 @@ jobs:
181210
182211
if [[ -z "$EXTERNAL_IP" ]]; then
183212
echo "Warning: Could not obtain external IP for Memgraph service within timeout."
213+
echo "Checking LoadBalancer service status:"
214+
kubectl describe service memgraph
184215
fi

infra/azure/main.tf

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,12 @@ resource "azurerm_kubernetes_cluster" "ai_agent" {
6262
default_node_pool {
6363
name = "default"
6464
node_count = 1
65-
vm_size = "Standard_D2_v2"
65+
vm_size = "Standard_D4_v2" # Increased from D2 to D4 for more resources
66+
67+
# Enable auto-scaling for better resource management
68+
enable_auto_scaling = true
69+
min_count = 1
70+
max_count = 3
6671
}
6772

6873
identity {

infra/k8s/memgraph.yaml

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,21 @@ spec:
2828
command: ["sysctl", "-w", "vm.max_map_count=262144"]
2929
securityContext:
3030
privileged: true
31+
# Add node affinity to schedule on nodes with sufficient resources
32+
affinity:
33+
nodeAffinity:
34+
preferredDuringSchedulingIgnoredDuringExecution:
35+
- weight: 100
36+
preference:
37+
matchExpressions:
38+
- key: beta.kubernetes.io/instance-type
39+
operator: In
40+
values:
41+
- Standard_D4_v2
42+
tolerations:
43+
- key: "node.kubernetes.io/memory-pressure"
44+
operator: "Exists"
45+
effect: "NoSchedule"
3146
containers:
3247
- name: memgraph
3348
image: memgraph/memgraph-mage:latest
@@ -56,11 +71,11 @@ spec:
5671
mountPath: /etc/memgraph
5772
resources:
5873
requests:
74+
memory: "512Mi"
75+
cpu: "250m"
76+
limits:
5977
memory: "1Gi"
6078
cpu: "500m"
61-
limits:
62-
memory: "2Gi"
63-
cpu: "1"
6479
livenessProbe:
6580
httpGet:
6681
path: /api/v1/storage/status

0 commit comments

Comments
 (0)