Skip to content

Commit 29d5574

Browse files
author
Timothy Le
committed
OSMO IAM
working public endpoint SSO Signed-off-by: Timothy Le <tle@nebius.com> installing oidc client in script Signed-off-by: Timothy Le <tle@nebius.com> basic UI and CLI login working Signed-off-by: Timothy Le <tle@nebius.com> fix for osmo CLI login Signed-off-by: Timothy Le <tle@nebius.com> sso fixes Signed-off-by: Timothy Le <tle@nebius.com>
1 parent 26fa51c commit 29d5574

22 files changed

+1771
-125
lines changed

applications/osmo/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ id_rsa*
2424
.env
2525
.env.*
2626
!.env.example
27+
osmo-deploy.env
2728

2829
# IDE
2930
.idea/

applications/osmo/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Deploy [NVIDIA OSMO](https://nvidia.github.io/OSMO/main/user_guide/index.html) o
2222
| No External DNS service | Manual DNS configuration required | Not addressed |
2323
| No managed SSL/TLS service | Manual certificate management | Not addressed |
2424
| No public Load Balancer (ALB/NLB) | Use port-forwarding or WireGuard VPN for access | Workaround in place |
25-
| IDP integration for Nebius | Using OSMO dev auth mode; Keycloak available but not integrated | TBD |
25+
| IDP integration for Nebius | Keycloak + Nebius SSO (OIDC) supported; see [Authentication](deploy/example/002-setup/AUTHENTICATION.md) | Done |
2626
| Nebius Observability Stack integration | Using self-deployed Prometheus/Grafana/Loki | TODO |
2727
| Single cluster for Control Plane + Backend | Using 1 MK8s cluster for both; production separation TBD | Discuss with Nebius |
2828

@@ -234,7 +234,7 @@ See [Terraform README](deploy/001-iac/README.md) for configuration options, and
234234

235235
The script automatically:
236236
- Starts a port-forward to OSMO service
237-
- Logs in using dev method (since Keycloak auth is disabled)
237+
- Logs in via Keycloak (or dev method if Keycloak is disabled)
238238
- Creates a service token for the backend operator
239239
- Deploys the backend operator
240240
- Cleans up the port-forward

applications/osmo/deploy/example/000-prerequisites/secrets-init_deprecated.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,17 @@ echo " OSMO Secrets Initialization"
3232
echo "========================================"
3333
echo ""
3434

35+
# If env not set, source nebius-env-init.sh from this script's directory (so ./secrets-init.sh works without prior source)
36+
if [[ -z "${NEBIUS_PROJECT_ID:-}" ]]; then
37+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
38+
if [[ -f "${SCRIPT_DIR}/nebius-env-init.sh" ]]; then
39+
echo "Sourcing ${SCRIPT_DIR}/nebius-env-init.sh (NEBIUS_PROJECT_ID not set)..."
40+
# shellcheck source=./nebius-env-init.sh
41+
source "${SCRIPT_DIR}/nebius-env-init.sh"
42+
echo ""
43+
fi
44+
fi
45+
3546
# -----------------------------------------------------------------------------
3647
# Helper Functions
3748
# -----------------------------------------------------------------------------

applications/osmo/deploy/example/001-iac/outputs.tf

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,10 @@ output "next_steps" {
167167
./01-deploy-gpu-infrastructure.sh
168168
./02-deploy-observability.sh
169169
./03-deploy-nginx-ingress.sh
170-
./04-enable-tls.sh <hostname>
171-
./05-deploy-osmo-control-plane.sh
172-
./06-deploy-osmo-backend.sh
170+
./03b-enable-tls.sh <hostname> # optional, recommended; omit <hostname> to use OSMO_INGRESS_HOSTNAME
171+
./04-deploy-osmo-control-plane.sh
172+
./05-deploy-osmo-backend.sh
173+
./06-configure-storage.sh
173174
174175
${var.enable_managed_postgresql ? "PostgreSQL Connection (Managed):\n Host: ${module.platform.postgresql_host}\n Port: ${module.platform.postgresql_port}\n Database: ${module.platform.postgresql_database}\n Username: ${module.platform.postgresql_username}" : "PostgreSQL: Using in-cluster PostgreSQL (deployed via Helm in 05-deploy-osmo-control-plane.sh)"}
175176

applications/osmo/deploy/example/002-setup/03-deploy-nginx-ingress.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# We do not use the quick-start umbrella chart here (Nebius uses managed DB, etc.),
1010
# so we install the controller explicitly. Not a duplicate of OSMO—same upstream chart.
1111
#
12-
# Run before 05-deploy-osmo-control-plane.sh.
12+
# Run before 04-deploy-osmo-control-plane.sh (and optionally 03b-enable-tls.sh).
1313
# See: https://kubernetes.github.io/ingress-nginx/deploy/
1414

1515
set -e
@@ -84,6 +84,7 @@ echo "========================================"
8484
log_success "NGINX Ingress deployment complete"
8585
echo "========================================"
8686
echo ""
87-
echo "Next: run 04-enable-tls.sh <hostname> (optional, recommended)"
88-
echo " then 05-deploy-osmo-control-plane.sh"
87+
echo "Next: run 03b-enable-tls.sh <hostname> (optional, recommended)"
88+
echo " - If you omit <hostname>, it will use OSMO_INGRESS_HOSTNAME."
89+
echo "Then: run 04-deploy-osmo-control-plane.sh"
8990
echo ""

applications/osmo/deploy/example/002-setup/04-enable-tls.sh renamed to applications/osmo/deploy/example/002-setup/03b-enable-tls.sh

Lines changed: 53 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# 1) cert-manager (default) — automated HTTP-01 challenges via in-cluster cert-manager
77
# 2) certbot — interactive manual DNS-01 challenges via local certbot binary
88
#
9-
# Set OSMO_TLS_MODE=certbot or OSMO_TLS_MODE=cert-manager to skip the prompt.
9+
# Default: OSMO_TLS_MODE=cert-manager (no prompt). Set to certbot to use certbot instead.
1010
#
1111
# Can be run at two points in the deployment flow:
1212
#
@@ -24,10 +24,12 @@
2424
# (A record for cert-manager/HTTP-01; TXT record for certbot/DNS-01)
2525
#
2626
# Usage:
27-
# ./04-enable-tls.sh [hostname]
27+
# ./03b-enable-tls.sh [hostname]
28+
# - If [hostname] is omitted, the script uses OSMO_INGRESS_HOSTNAME.
2829
#
2930
# Optional environment variables:
30-
# OSMO_TLS_MODE - "cert-manager" or "certbot" (skips prompt)
31+
# OSMO_TLS_MODE - "cert-manager" (default) or "certbot"
32+
# OSMO_TLS_SKIP_DNS_CONFIRM - set to "true" to skip DNS confirmation prompt (auto-set for nip.io hostnames)
3133
# OSMO_TLS_EMAIL - Email for Let's Encrypt (default: noreply@<domain>)
3234
# OSMO_TLS_SECRET_NAME - K8s Secret name for certificate (default: osmo-tls)
3335
# LETSENCRYPT_EMAIL - Alias for OSMO_TLS_EMAIL (certbot path)
@@ -75,14 +77,14 @@ check_kubectl || exit 1
7577
log_info "Hostname: ${MAIN_HOSTNAME}"
7678
log_info "TLS secret: ${TLS_SECRET}"
7779

78-
# Keycloak auth subdomain support
80+
# Keycloak auth subdomain support (auth-<main> e.g. auth-osmo.89-169-122-246.nip.io for nip.io)
7981
KC_TLS_SECRET="${KEYCLOAK_TLS_SECRET_NAME:-osmo-tls-auth}"
8082
AUTH_HOSTNAME=""
8183
if [[ "${DEPLOY_KEYCLOAK:-false}" == "true" ]]; then
8284
if [[ -n "${KEYCLOAK_HOSTNAME:-}" ]]; then
8385
AUTH_HOSTNAME="${KEYCLOAK_HOSTNAME}"
8486
else
85-
AUTH_HOSTNAME="auth.${MAIN_HOSTNAME}"
87+
AUTH_HOSTNAME="auth-${MAIN_HOSTNAME}"
8688
fi
8789
log_info "Keycloak auth hostname: ${AUTH_HOSTNAME}"
8890
log_info "Keycloak TLS secret: ${KC_TLS_SECRET}"
@@ -148,7 +150,13 @@ else
148150
echo "Let's Encrypt HTTP-01 challenges require DNS to resolve to the LoadBalancer."
149151
fi
150152
echo ""
151-
read_prompt_var "Press Enter once DNS records are configured (or type 'skip' to skip DNS check)" DNS_CONFIRM ""
153+
# Skip DNS confirmation when using nip.io (resolves automatically) or when OSMO_TLS_SKIP_DNS_CONFIRM is set
154+
if [[ "${MAIN_HOSTNAME}" == *"nip.io"* || "${OSMO_TLS_SKIP_DNS_CONFIRM:-false}" == "true" ]]; then
155+
DNS_CONFIRM="skip"
156+
log_info "Skipping DNS confirmation (nip.io or OSMO_TLS_SKIP_DNS_CONFIRM=true)"
157+
else
158+
read_prompt_var "Press Enter once DNS records are configured (or type 'skip' to skip DNS check)" DNS_CONFIRM ""
159+
fi
152160

153161
# Verify DNS resolves to the LoadBalancer IP
154162
if [[ "$DNS_CONFIRM" != "skip" ]]; then
@@ -595,42 +603,52 @@ EOF
595603

596604
# -------------------------------------------------------------------------
597605
# Restore OSMO Ingress resources with TLS (Mode B)
606+
# Restore osmo-ui first so it is oldest for host+path / and wins in NGINX merge (avoids 404 on /).
598607
# -------------------------------------------------------------------------
599608
if [[ "$OSMO_DEPLOYED" == "true" && "$CERT_READY" == "True" ]]; then
600609
log_info "Restoring OSMO Ingress resources with TLS..."
601610

611+
# Order: osmo-ui first so it is oldest for host osmo.* and wins path / (avoids 404 on /)
612+
RESTORE_ORDER=()
602613
for ing_name in "${REMOVED_INGRESSES[@]}"; do
614+
if [[ "$ing_name" == "osmo-ui" ]]; then
615+
RESTORE_ORDER=("$ing_name" "${RESTORE_ORDER[@]}")
616+
else
617+
RESTORE_ORDER+=("$ing_name")
618+
fi
619+
done
620+
for ing_name in "${RESTORE_ORDER[@]}"; do
603621
backup_file="/tmp/osmo-tls-backup/${ing_name}.yaml"
604622
[[ ! -f "$backup_file" ]] && continue
605623

606-
# Determine which hostname/secret this ingress should use
624+
# Determine which hostname/secret this ingress should use (keycloak = auth host, rest = main host)
607625
local_host=$(yq -r '.spec.rules[0].host // ""' "$backup_file" 2>/dev/null || \
608626
python3 -c "import yaml,sys; d=yaml.safe_load(open('$backup_file')); print(d.get('spec',{}).get('rules',[{}])[0].get('host',''))" 2>/dev/null || echo "")
609627
tls_secret_name="${TLS_SECRET}"
610628
tls_host="${MAIN_HOSTNAME}"
611-
if [[ "$local_host" == *"auth."* && -n "$AUTH_HOSTNAME" && "$AUTH_CERT_READY" == "True" ]]; then
629+
if [[ ( "$ing_name" == "keycloak" || "$local_host" == *"auth"* ) && -n "$AUTH_HOSTNAME" && "$AUTH_CERT_READY" == "True" ]]; then
612630
tls_secret_name="${KC_TLS_SECRET}"
613631
tls_host="${AUTH_HOSTNAME}"
614632
fi
615633

616-
# Re-apply the backup, then patch in TLS (no cert-manager annotation)
634+
# Re-apply the backup, then patch host + TLS so Ingress matches intended hostname (not stale .local from backup)
617635
kubectl apply -f "$backup_file" 2>/dev/null || true
618-
kubectl patch ingress "$ing_name" -n "${OSMO_NS}" --type=merge -p "$(cat <<PATCH
619-
{
620-
"metadata": {
621-
"annotations": {
622-
"nginx.ingress.kubernetes.io/ssl-redirect": "true"
623-
}
624-
},
625-
"spec": {
626-
"tls": [{
627-
"hosts": ["${tls_host}"],
628-
"secretName": "${tls_secret_name}"
629-
}]
630-
}
631-
}
632-
PATCH
633-
)" && log_success " ${ing_name}: restored with TLS" || log_warning " ${ing_name}: failed to restore"
636+
if kubectl patch ingress "$ing_name" -n "${OSMO_NS}" --type=merge -p '{"metadata":{"annotations":{"nginx.ingress.kubernetes.io/ssl-redirect":"true"}},"spec":{"tls":[{"hosts":["'"${tls_host}"'"],"secretName":"'"${tls_secret_name}"'"}]}}' 2>/dev/null; then
637+
# Patch host on first rule (some Ingress have multiple rules; we only set rules[0].host)
638+
kubectl patch ingress "$ing_name" -n "${OSMO_NS}" --type=json -p '[{"op":"replace","path":"/spec/rules/0/host","value":"'"${tls_host}"'"}]' 2>/dev/null || true
639+
# For osmo host only: give non-UI ingresses path /api or /api/... so only osmo-ui has path / (avoids 404 on /)
640+
if [[ "$tls_host" == "$MAIN_HOSTNAME" && "$ing_name" != "osmo-ui" ]]; then
641+
if [[ "$ing_name" == "osmo-service" ]]; then
642+
api_path="/api"
643+
else
644+
api_path="/api/${ing_name#osmo-}"
645+
fi
646+
kubectl patch ingress "$ing_name" -n "${OSMO_NS}" --type=json -p '[{"op":"replace","path":"/spec/rules/0/http/paths/0/path","value":"'"${api_path}"'"}]' 2>/dev/null && log_info " ${ing_name}: path set to ${api_path}" || true
647+
fi
648+
log_success " ${ing_name}: restored with TLS (host: ${tls_host})"
649+
else
650+
log_warning " ${ing_name}: failed to restore"
651+
fi
634652
done
635653

636654
# Clean up backups
@@ -658,12 +676,13 @@ fi # end TLS_MODE
658676

659677
# =============================================================================
660678
# Update OSMO service_base_url to HTTPS (only if OSMO is already deployed)
679+
# Use auth-bypass port-forward + direct PATCH so we don't trigger Keycloak (.local redirect).
661680
# =============================================================================
662681
if [[ "$OSMO_DEPLOYED" == "true" ]]; then
663682
log_info "Updating OSMO service_base_url to https://${MAIN_HOSTNAME}..."
664683

665-
kubectl port-forward -n "${OSMO_NS}" svc/osmo-service 8080:80 &>/dev/null &
666-
_PF_PID=$!
684+
start_osmo_port_forward "${OSMO_NS}" 8080
685+
_PF_PID=$PORT_FORWARD_PID
667686
trap 'kill $_PF_PID 2>/dev/null; wait $_PF_PID 2>/dev/null' EXIT
668687

669688
_pf_ready=false
@@ -676,24 +695,19 @@ if [[ "$OSMO_DEPLOYED" == "true" ]]; then
676695
done
677696

678697
if [[ "$_pf_ready" == "true" ]]; then
679-
if osmo login http://localhost:8080 --method dev --username admin 2>/dev/null; then
680-
cat > /tmp/service_url_tls.json <<SVCEOF
698+
cat > /tmp/service_url_tls.json <<SVCEOF
681699
{
682700
"service_base_url": "https://${MAIN_HOSTNAME}"
683701
}
684702
SVCEOF
685-
if osmo config update SERVICE --file /tmp/service_url_tls.json --description "Enable HTTPS" 2>/dev/null; then
686-
NEW_URL=$(curl -s "http://localhost:8080/api/configs/service" 2>/dev/null | jq -r '.service_base_url // ""')
687-
log_success "service_base_url updated to: ${NEW_URL}"
688-
else
689-
log_warning "Could not update service_base_url automatically."
690-
log_info "Run: ./08-configure-service-url.sh https://${MAIN_HOSTNAME}"
691-
fi
692-
rm -f /tmp/service_url_tls.json
703+
if osmo_config_update SERVICE /tmp/service_url_tls.json "Enable HTTPS" 8080; then
704+
NEW_URL=$(osmo_curl GET "http://localhost:8080/api/configs/service" 2>/dev/null | jq -r '.service_base_url // ""')
705+
log_success "service_base_url updated to: ${NEW_URL}"
693706
else
694-
log_warning "Could not login to OSMO API. Update service_base_url manually:"
695-
log_info " ./08-configure-service-url.sh https://${MAIN_HOSTNAME}"
707+
log_warning "Could not update service_base_url automatically."
708+
log_info "Run: ./08-configure-service-url.sh https://${MAIN_HOSTNAME}"
696709
fi
710+
rm -f /tmp/service_url_tls.json
697711
else
698712
log_warning "Could not connect to OSMO API. Update service_base_url manually:"
699713
log_info " ./08-configure-service-url.sh https://${MAIN_HOSTNAME}"

0 commit comments

Comments
 (0)