Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions applications/osmo/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ id_rsa*
.env
.env.*
!.env.example
osmo-deploy.env

# IDE
.idea/
Expand Down
4 changes: 2 additions & 2 deletions applications/osmo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Deploy [NVIDIA OSMO](https://nvidia.github.io/OSMO/main/user_guide/index.html) o
| No External DNS service | Manual DNS configuration required | Not addressed |
| No managed SSL/TLS service | Manual certificate management | Not addressed |
| No public Load Balancer (ALB/NLB) | Use port-forwarding or WireGuard VPN for access | Workaround in place |
| IDP integration for Nebius | Using OSMO dev auth mode; Keycloak available but not integrated | TBD |
| IDP integration for Nebius | Keycloak + Nebius SSO (OIDC) supported; see [Authentication](deploy/example/002-setup/AUTHENTICATION.md) | Done |
| Nebius Observability Stack integration | Using self-deployed Prometheus/Grafana/Loki | TODO |
| Single cluster for Control Plane + Backend | Using 1 MK8s cluster for both; production separation TBD | Discuss with Nebius |

Expand Down Expand Up @@ -234,7 +234,7 @@ See [Terraform README](deploy/001-iac/README.md) for configuration options, and

The script automatically:
- Starts a port-forward to OSMO service
- Logs in using dev method (since Keycloak auth is disabled)
- Logs in via Keycloak (or dev method if Keycloak is disabled)
- Creates a service token for the backend operator
- Deploys the backend operator
- Cleans up the port-forward
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ echo " OSMO Secrets Initialization"
echo "========================================"
echo ""

# If env not set, source nebius-env-init.sh from this script's directory (so ./secrets-init.sh works without prior source)
if [[ -z "${NEBIUS_PROJECT_ID:-}" ]]; then
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
if [[ -f "${SCRIPT_DIR}/nebius-env-init.sh" ]]; then
echo "Sourcing ${SCRIPT_DIR}/nebius-env-init.sh (NEBIUS_PROJECT_ID not set)..."
# shellcheck source=./nebius-env-init.sh
source "${SCRIPT_DIR}/nebius-env-init.sh"
echo ""
fi
fi

# -----------------------------------------------------------------------------
# Helper Functions
# -----------------------------------------------------------------------------
Expand Down
7 changes: 4 additions & 3 deletions applications/osmo/deploy/example/001-iac/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,10 @@ output "next_steps" {
./01-deploy-gpu-infrastructure.sh
./02-deploy-observability.sh
./03-deploy-nginx-ingress.sh
./04-enable-tls.sh <hostname>
./05-deploy-osmo-control-plane.sh
./06-deploy-osmo-backend.sh
./03b-enable-tls.sh <hostname> # optional, recommended; omit <hostname> to use OSMO_INGRESS_HOSTNAME
./04-deploy-osmo-control-plane.sh
./05-deploy-osmo-backend.sh
./06-configure-storage.sh
${var.enable_managed_postgresql ? "PostgreSQL Connection (Managed):\n Host: ${module.platform.postgresql_host}\n Port: ${module.platform.postgresql_port}\n Database: ${module.platform.postgresql_database}\n Username: ${module.platform.postgresql_username}" : "PostgreSQL: Using in-cluster PostgreSQL (deployed via Helm in 05-deploy-osmo-control-plane.sh)"}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# We do not use the quick-start umbrella chart here (Nebius uses managed DB, etc.),
# so we install the controller explicitly. Not a duplicate of OSMO—same upstream chart.
#
# Run before 05-deploy-osmo-control-plane.sh.
# Run before 04-deploy-osmo-control-plane.sh (and optionally 03b-enable-tls.sh).
# See: https://kubernetes.github.io/ingress-nginx/deploy/

set -e
Expand Down Expand Up @@ -84,6 +84,7 @@ echo "========================================"
log_success "NGINX Ingress deployment complete"
echo "========================================"
echo ""
echo "Next: run 04-enable-tls.sh <hostname> (optional, recommended)"
echo " then 05-deploy-osmo-control-plane.sh"
echo "Next: run 03b-enable-tls.sh <hostname> (optional, recommended)"
echo " - If you omit <hostname>, it will use OSMO_INGRESS_HOSTNAME."
echo "Then: run 04-deploy-osmo-control-plane.sh"
echo ""
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# 1) cert-manager (default) — automated HTTP-01 challenges via in-cluster cert-manager
# 2) certbot — interactive manual DNS-01 challenges via local certbot binary
#
# Set OSMO_TLS_MODE=certbot or OSMO_TLS_MODE=cert-manager to skip the prompt.
# Default: OSMO_TLS_MODE=cert-manager (no prompt). Set to certbot to use certbot instead.
#
# Can be run at two points in the deployment flow:
#
Expand All @@ -24,10 +24,12 @@
# (A record for cert-manager/HTTP-01; TXT record for certbot/DNS-01)
#
# Usage:
# ./04-enable-tls.sh [hostname]
# ./03b-enable-tls.sh [hostname]
# - If [hostname] is omitted, the script uses OSMO_INGRESS_HOSTNAME.
#
# Optional environment variables:
# OSMO_TLS_MODE - "cert-manager" or "certbot" (skips prompt)
# OSMO_TLS_MODE - "cert-manager" (default) or "certbot"
# OSMO_TLS_SKIP_DNS_CONFIRM - set to "true" to skip DNS confirmation prompt (auto-set for nip.io hostnames)
# OSMO_TLS_EMAIL - Email for Let's Encrypt (default: noreply@<domain>)
# OSMO_TLS_SECRET_NAME - K8s Secret name for certificate (default: osmo-tls)
# LETSENCRYPT_EMAIL - Alias for OSMO_TLS_EMAIL (certbot path)
Expand Down Expand Up @@ -75,14 +77,14 @@ check_kubectl || exit 1
log_info "Hostname: ${MAIN_HOSTNAME}"
log_info "TLS secret: ${TLS_SECRET}"

# Keycloak auth subdomain support
# Keycloak auth subdomain support (auth-<main> e.g. auth-osmo.89-169-122-246.nip.io for nip.io)
KC_TLS_SECRET="${KEYCLOAK_TLS_SECRET_NAME:-osmo-tls-auth}"
AUTH_HOSTNAME=""
if [[ "${DEPLOY_KEYCLOAK:-false}" == "true" ]]; then
if [[ -n "${KEYCLOAK_HOSTNAME:-}" ]]; then
AUTH_HOSTNAME="${KEYCLOAK_HOSTNAME}"
else
AUTH_HOSTNAME="auth.${MAIN_HOSTNAME}"
AUTH_HOSTNAME="auth-${MAIN_HOSTNAME}"
fi
log_info "Keycloak auth hostname: ${AUTH_HOSTNAME}"
log_info "Keycloak TLS secret: ${KC_TLS_SECRET}"
Expand Down Expand Up @@ -148,7 +150,13 @@ else
echo "Let's Encrypt HTTP-01 challenges require DNS to resolve to the LoadBalancer."
fi
echo ""
read_prompt_var "Press Enter once DNS records are configured (or type 'skip' to skip DNS check)" DNS_CONFIRM ""
# Skip DNS confirmation when using nip.io (resolves automatically) or when OSMO_TLS_SKIP_DNS_CONFIRM is set
if [[ "${MAIN_HOSTNAME}" == *"nip.io"* || "${OSMO_TLS_SKIP_DNS_CONFIRM:-false}" == "true" ]]; then
DNS_CONFIRM="skip"
log_info "Skipping DNS confirmation (nip.io or OSMO_TLS_SKIP_DNS_CONFIRM=true)"
else
read_prompt_var "Press Enter once DNS records are configured (or type 'skip' to skip DNS check)" DNS_CONFIRM ""
fi

# Verify DNS resolves to the LoadBalancer IP
if [[ "$DNS_CONFIRM" != "skip" ]]; then
Expand Down Expand Up @@ -595,42 +603,52 @@ EOF

# -------------------------------------------------------------------------
# Restore OSMO Ingress resources with TLS (Mode B)
# Restore osmo-ui first so it is oldest for host+path / and wins in NGINX merge (avoids 404 on /).
# -------------------------------------------------------------------------
if [[ "$OSMO_DEPLOYED" == "true" && "$CERT_READY" == "True" ]]; then
log_info "Restoring OSMO Ingress resources with TLS..."

# Order: osmo-ui first so it is oldest for host osmo.* and wins path / (avoids 404 on /)
RESTORE_ORDER=()
for ing_name in "${REMOVED_INGRESSES[@]}"; do
if [[ "$ing_name" == "osmo-ui" ]]; then
RESTORE_ORDER=("$ing_name" "${RESTORE_ORDER[@]}")
else
RESTORE_ORDER+=("$ing_name")
fi
done
for ing_name in "${RESTORE_ORDER[@]}"; do
backup_file="/tmp/osmo-tls-backup/${ing_name}.yaml"
[[ ! -f "$backup_file" ]] && continue

# Determine which hostname/secret this ingress should use
# Determine which hostname/secret this ingress should use (keycloak = auth host, rest = main host)
local_host=$(yq -r '.spec.rules[0].host // ""' "$backup_file" 2>/dev/null || \
python3 -c "import yaml,sys; d=yaml.safe_load(open('$backup_file')); print(d.get('spec',{}).get('rules',[{}])[0].get('host',''))" 2>/dev/null || echo "")
tls_secret_name="${TLS_SECRET}"
tls_host="${MAIN_HOSTNAME}"
if [[ "$local_host" == *"auth."* && -n "$AUTH_HOSTNAME" && "$AUTH_CERT_READY" == "True" ]]; then
if [[ ( "$ing_name" == "keycloak" || "$local_host" == *"auth"* ) && -n "$AUTH_HOSTNAME" && "$AUTH_CERT_READY" == "True" ]]; then
tls_secret_name="${KC_TLS_SECRET}"
tls_host="${AUTH_HOSTNAME}"
fi

# Re-apply the backup, then patch in TLS (no cert-manager annotation)
# Re-apply the backup, then patch host + TLS so Ingress matches intended hostname (not stale .local from backup)
kubectl apply -f "$backup_file" 2>/dev/null || true
kubectl patch ingress "$ing_name" -n "${OSMO_NS}" --type=merge -p "$(cat <<PATCH
{
"metadata": {
"annotations": {
"nginx.ingress.kubernetes.io/ssl-redirect": "true"
}
},
"spec": {
"tls": [{
"hosts": ["${tls_host}"],
"secretName": "${tls_secret_name}"
}]
}
}
PATCH
)" && log_success " ${ing_name}: restored with TLS" || log_warning " ${ing_name}: failed to restore"
if kubectl patch ingress "$ing_name" -n "${OSMO_NS}" --type=merge -p '{"metadata":{"annotations":{"nginx.ingress.kubernetes.io/ssl-redirect":"true"}},"spec":{"tls":[{"hosts":["'"${tls_host}"'"],"secretName":"'"${tls_secret_name}"'"}]}}' 2>/dev/null; then
# Patch host on first rule (some Ingress have multiple rules; we only set rules[0].host)
kubectl patch ingress "$ing_name" -n "${OSMO_NS}" --type=json -p '[{"op":"replace","path":"/spec/rules/0/host","value":"'"${tls_host}"'"}]' 2>/dev/null || true
# For osmo host only: give non-UI ingresses path /api or /api/... so only osmo-ui has path / (avoids 404 on /)
if [[ "$tls_host" == "$MAIN_HOSTNAME" && "$ing_name" != "osmo-ui" ]]; then
if [[ "$ing_name" == "osmo-service" ]]; then
api_path="/api"
else
api_path="/api/${ing_name#osmo-}"
fi
kubectl patch ingress "$ing_name" -n "${OSMO_NS}" --type=json -p '[{"op":"replace","path":"/spec/rules/0/http/paths/0/path","value":"'"${api_path}"'"}]' 2>/dev/null && log_info " ${ing_name}: path set to ${api_path}" || true
fi
log_success " ${ing_name}: restored with TLS (host: ${tls_host})"
else
log_warning " ${ing_name}: failed to restore"
fi
done

# Clean up backups
Expand Down Expand Up @@ -658,12 +676,13 @@ fi # end TLS_MODE

# =============================================================================
# Update OSMO service_base_url to HTTPS (only if OSMO is already deployed)
# Use auth-bypass port-forward + direct PATCH so we don't trigger Keycloak (.local redirect).
# =============================================================================
if [[ "$OSMO_DEPLOYED" == "true" ]]; then
log_info "Updating OSMO service_base_url to https://${MAIN_HOSTNAME}..."

kubectl port-forward -n "${OSMO_NS}" svc/osmo-service 8080:80 &>/dev/null &
_PF_PID=$!
start_osmo_port_forward "${OSMO_NS}" 8080
_PF_PID=$PORT_FORWARD_PID
trap 'kill $_PF_PID 2>/dev/null; wait $_PF_PID 2>/dev/null' EXIT

_pf_ready=false
Expand All @@ -676,24 +695,19 @@ if [[ "$OSMO_DEPLOYED" == "true" ]]; then
done

if [[ "$_pf_ready" == "true" ]]; then
if osmo login http://localhost:8080 --method dev --username admin 2>/dev/null; then
cat > /tmp/service_url_tls.json <<SVCEOF
cat > /tmp/service_url_tls.json <<SVCEOF
{
"service_base_url": "https://${MAIN_HOSTNAME}"
}
SVCEOF
if osmo config update SERVICE --file /tmp/service_url_tls.json --description "Enable HTTPS" 2>/dev/null; then
NEW_URL=$(curl -s "http://localhost:8080/api/configs/service" 2>/dev/null | jq -r '.service_base_url // ""')
log_success "service_base_url updated to: ${NEW_URL}"
else
log_warning "Could not update service_base_url automatically."
log_info "Run: ./08-configure-service-url.sh https://${MAIN_HOSTNAME}"
fi
rm -f /tmp/service_url_tls.json
if osmo_config_update SERVICE /tmp/service_url_tls.json "Enable HTTPS" 8080; then
NEW_URL=$(osmo_curl GET "http://localhost:8080/api/configs/service" 2>/dev/null | jq -r '.service_base_url // ""')
log_success "service_base_url updated to: ${NEW_URL}"
else
log_warning "Could not login to OSMO API. Update service_base_url manually:"
log_info " ./08-configure-service-url.sh https://${MAIN_HOSTNAME}"
log_warning "Could not update service_base_url automatically."
log_info "Run: ./08-configure-service-url.sh https://${MAIN_HOSTNAME}"
fi
rm -f /tmp/service_url_tls.json
else
log_warning "Could not connect to OSMO API. Update service_base_url manually:"
log_info " ./08-configure-service-url.sh https://${MAIN_HOSTNAME}"
Expand Down
Loading