From 53d0472995435c59092b04bc01996d1790b8ce8b Mon Sep 17 00:00:00 2001 From: chiragkyal Date: Thu, 30 Oct 2025 14:21:11 +0530 Subject: [PATCH] Add OLM Plugin for Day-2 Operator Management Signed-off-by: chiragkyal --- .claude-plugin/marketplace.json | 4 +- PLUGINS.md | 11 +- docs/data.json | 58 +++- plugins/olm/.claude-plugin/plugin.json | 6 +- plugins/olm/README.md | 453 +++++++++++++++++++++++-- plugins/olm/commands/approve.md | 305 +++++++++++++++++ plugins/olm/commands/catalog.md | 433 +++++++++++++++++++++++ plugins/olm/commands/diagnose.md | 410 ++++++++++++++++++++++ plugins/olm/commands/install.md | 272 +++++++++++++++ plugins/olm/commands/list.md | 174 ++++++++++ plugins/olm/commands/search.md | 247 ++++++++++++++ plugins/olm/commands/status.md | 351 +++++++++++++++++++ plugins/olm/commands/uninstall.md | 392 +++++++++++++++++++++ plugins/olm/commands/upgrade.md | 349 +++++++++++++++++++ 14 files changed, 3435 insertions(+), 30 deletions(-) create mode 100644 plugins/olm/commands/approve.md create mode 100644 plugins/olm/commands/catalog.md create mode 100644 plugins/olm/commands/diagnose.md create mode 100644 plugins/olm/commands/install.md create mode 100644 plugins/olm/commands/list.md create mode 100644 plugins/olm/commands/search.md create mode 100644 plugins/olm/commands/status.md create mode 100644 plugins/olm/commands/uninstall.md create mode 100644 plugins/olm/commands/upgrade.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 1cfb4c7..e290892 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -42,7 +42,7 @@ { "name": "olm", "source": "./plugins/olm", - "description": "OLM (Operator Lifecycle Manager) debugging and analysis tools" + "description": "OLM (Operator Lifecycle Manager) plugin for operator management and debugging" }, { "name": "prow-job", @@ -70,4 +70,4 @@ "description": "A plugin to analyze and report on must-gather data" } ] -} +} \ No newline at end of file diff --git a/PLUGINS.md b/PLUGINS.md index bb95513..f51f130 100644 --- a/PLUGINS.md +++ b/PLUGINS.md @@ -93,10 +93,19 @@ See [plugins/must-gather/README.md](plugins/must-gather/README.md) for detailed ### Olm Plugin -OLM (Operator Lifecycle Manager) debugging and analysis tools +OLM (Operator Lifecycle Manager) plugin for operator management and debugging **Commands:** +- **`/olm:approve` ` [namespace] [--all]`** - Approve pending InstallPlans for operator installations and upgrades +- **`/olm:catalog` ` [arguments]`** - Manage catalog sources for discovering and installing operators - **`/olm:debug` ` [olm-version]`** - Debug OLM issues using must-gather logs and source code analysis +- **`/olm:diagnose` `[operator-name] [namespace] [--fix] [--cluster]`** - Diagnose and optionally fix common OLM and operator issues +- **`/olm:install` ` [namespace] [channel] [source] [--approval=Automatic|Manual]`** - Install a day-2 operator using Operator Lifecycle Manager +- **`/olm:list` `[namespace] [--all-namespaces]`** - List installed operators in the cluster +- **`/olm:search` `[query] [--catalog ]`** - Search for available operators in catalog sources +- **`/olm:status` ` [namespace]`** - Get detailed status and health information for an operator +- **`/olm:uninstall` ` [namespace] [--remove-crds] [--remove-namespace]`** - Uninstall a day-2 operator and optionally remove its resources +- **`/olm:upgrade` ` [namespace] [--channel=] [--approve]`** - Update an operator to the latest version or switch channels See [plugins/olm/README.md](plugins/olm/README.md) for detailed documentation. diff --git a/docs/data.json b/docs/data.json index 7165817..b84b267 100644 --- a/docs/data.json +++ b/docs/data.json @@ -246,14 +246,68 @@ }, { "name": "olm", - "description": "OLM (Operator Lifecycle Manager) debugging and analysis tools", - "version": "0.0.1", + "description": "OLM (Operator Lifecycle Manager) plugin for operator management and debugging", + "version": "0.1.0", "commands": [ + { + "name": "approve", + "description": "Approve pending InstallPlans for operator installations and upgrades", + "synopsis": "/olm:approve [namespace] [--all]", + "argument_hint": " [namespace] [--all]" + }, + { + "name": "catalog", + "description": "Manage catalog sources for discovering and installing operators", + "synopsis": "/olm:catalog list", + "argument_hint": " [arguments]" + }, { "name": "debug", "description": "Debug OLM issues using must-gather logs and source code analysis", "synopsis": "/olm:debug [olm-version]", "argument_hint": " [olm-version]" + }, + { + "name": "diagnose", + "description": "Diagnose and optionally fix common OLM and operator issues", + "synopsis": "/olm:diagnose [operator-name] [namespace] [--fix] [--cluster]", + "argument_hint": "[operator-name] [namespace] [--fix] [--cluster]" + }, + { + "name": "install", + "description": "Install a day-2 operator using Operator Lifecycle Manager", + "synopsis": "/olm:install [namespace] [channel] [source] [--approval=Automatic|Manual]", + "argument_hint": " [namespace] [channel] [source] [--approval=Automatic|Manual]" + }, + { + "name": "list", + "description": "List installed operators in the cluster", + "synopsis": "/olm:list [namespace] [--all-namespaces]", + "argument_hint": "[namespace] [--all-namespaces]" + }, + { + "name": "search", + "description": "Search for available operators in catalog sources", + "synopsis": "/olm:search [query] [--catalog ]", + "argument_hint": "[query] [--catalog ]" + }, + { + "name": "status", + "description": "Get detailed status and health information for an operator", + "synopsis": "/olm:status [namespace]", + "argument_hint": " [namespace]" + }, + { + "name": "uninstall", + "description": "Uninstall a day-2 operator and optionally remove its resources", + "synopsis": "/olm:uninstall [namespace] [--remove-crds] [--remove-namespace]", + "argument_hint": " [namespace] [--remove-crds] [--remove-namespace]" + }, + { + "name": "upgrade", + "description": "Update an operator to the latest version or switch channels", + "synopsis": "/olm:upgrade [namespace] [--channel=] [--approve]", + "argument_hint": " [namespace] [--channel=] [--approve]" } ], "skills": [], diff --git a/plugins/olm/.claude-plugin/plugin.json b/plugins/olm/.claude-plugin/plugin.json index 7d8c442..127779b 100644 --- a/plugins/olm/.claude-plugin/plugin.json +++ b/plugins/olm/.claude-plugin/plugin.json @@ -1,8 +1,8 @@ { "name": "olm", - "description": "OLM (Operator Lifecycle Manager) debugging and analysis tools", - "version": "0.0.1", + "description": "OLM (Operator Lifecycle Manager) plugin for operator management and debugging", + "version": "0.1.0", "author": { "name": "github.com/openshift-eng" } -} +} \ No newline at end of file diff --git a/plugins/olm/README.md b/plugins/olm/README.md index 31c55b3..e48fc6b 100644 --- a/plugins/olm/README.md +++ b/plugins/olm/README.md @@ -1,14 +1,299 @@ # OLM Plugin -The OLM plugin provides commands for debugging and analyzing OLM (Operator Lifecycle Manager) issues in OpenShift clusters. +A comprehensive plugin for managing and debugging Operator Lifecycle Manager (OLM) in OpenShift clusters. ## Overview -This plugin helps developers and SREs troubleshoot OLM-related issues by automatically correlating must-gather logs with the appropriate OLM source code and searching for known bugs in Jira. It supports both OLMv0 and OLMv1 architectures and intelligently selects the correct code branch based on the OpenShift version. +This plugin provides comprehensive OLM capabilities: + +- **Operator Discovery**: Search and discover operators across all catalog sources +- **Lifecycle Management**: Install, upgrade, and uninstall operators with intelligent defaults +- **Health Monitoring**: List and check detailed operator health status +- **Update Management**: Check for and install operator updates with approval workflows +- **Troubleshooting**: Diagnose and fix common OLM issues automatically +- **Catalog Management**: Add, remove, and manage custom catalog sources +- **Advanced Debugging**: Troubleshoot OLM issues by correlating must-gather logs with source code and known bugs in Jira +- **Safety Features**: Orphaned resource cleanup, stuck namespace detection, and confirmation prompts +- **Context-Aware**: Automatic channel discovery, namespace auto-detection, and smart recommendations + +The plugin supports both OLMv0 (traditional OLM) and OLMv1 (next-generation) architectures. + +## Prerequisites + +- Claude Code installed +- OpenShift CLI (`oc`) installed and configured +- Access to an OpenShift cluster with cluster-admin or sufficient RBAC permissions +- `git` (required for debug command) +- Network access to GitHub and Jira (for debug command) ## Commands -### `/olm:debug` +### Operator Management Commands + +#### `/olm:search` - Search for Operators + +Search for available operators in OperatorHub catalogs. + +**Usage:** +```bash +/olm:search cert-manager # Search by keyword +/olm:search # List all operators +/olm:search prometheus --catalog community-operators # Search specific catalog +/olm:search external-secrets-operator --exact # Exact name match +``` + +**What it does:** +- Searches across all catalog sources or specific catalogs +- Shows operator details (versions, channels, descriptions) +- Groups results by catalog source +- Provides install commands for each operator + +**Arguments:** +- `query` (optional): Search term for filtering operators +- `--catalog ` (optional): Limit search to specific catalog +- `--exact` (optional): Only show exact name matches + +See [commands/search.md](commands/search.md) for full documentation. + +--- + +#### `/olm:install` - Install Operators + +Install operators from OperatorHub with smart defaults and verification. + +**Usage:** +```bash +/olm:install openshift-cert-manager-operator # Basic install +/olm:install openshift-cert-manager-operator my-namespace # Custom namespace +/olm:install openshift-cert-manager-operator ns stable-v1 # Specific channel +/olm:install prometheus ns stable community-operators --approval=Manual # Manual approval mode +``` + +**What it does:** +- Creates namespace and OperatorGroup automatically +- Auto-discovers default channel if not specified +- Creates Subscription with configurable approval mode +- Monitors installation progress and verifies CSV status +- Reports deployment and pod status + +**Arguments:** +- `operator-name` (required): Name of the operator +- `namespace` (optional): Target namespace (defaults to operator name) +- `channel` (optional): Subscription channel (auto-discovered if not provided) +- `source` (optional): CatalogSource name (defaults to "redhat-operators") +- `--approval=Automatic|Manual` (optional): InstallPlan approval mode (default: Automatic) + +See [commands/install.md](commands/install.md) for full documentation. + +--- + +#### `/olm:list` - List Installed Operators + +View all operators installed in the cluster with health status. + +**Usage:** +```bash +/olm:list # List all operators +/olm:list cert-manager-operator # List in specific namespace +/olm:list --all-namespaces # Explicit cluster-wide view +``` + +**What it does:** +- Shows operator status, versions, and channels +- Identifies operators requiring attention (failed, upgrading, etc.) +- Provides summary statistics by status and catalog +- Suggests troubleshooting commands for problematic operators + +**Arguments:** +- `namespace` (optional): Target namespace +- `--all-namespaces` or `-A` (optional): List cluster-wide + +See [commands/list.md](commands/list.md) for full documentation. + +--- + +#### `/olm:status` - Check Operator Status + +Get comprehensive health and status information for a specific operator. + +**Usage:** +```bash +/olm:status openshift-cert-manager-operator # Auto-discover namespace +/olm:status external-secrets-operator my-namespace # Specific namespace +``` + +**What it does:** +- Shows CSV, Subscription, and InstallPlan status +- Displays available updates and upgrade information +- Lists deployments and pods with health information +- Shows recent events and warnings +- Checks for pending manual approvals +- Provides context-aware troubleshooting recommendations + +**Arguments:** +- `operator-name` (required): Name of the operator +- `namespace` (optional): Namespace (auto-discovered if not provided) + +See [commands/status.md](commands/status.md) for full documentation. + +--- + +#### `/olm:upgrade` - Update Operators + +Update operators to the latest version or switch channels. + +**Usage:** +```bash +/olm:upgrade openshift-cert-manager-operator # Upgrade to latest +/olm:upgrade cert-manager ns --channel=tech-preview # Switch channel +/olm:upgrade prometheus ns --approve # Approve pending upgrade +``` + +**What it does:** +- Checks for available updates in current or different channels +- Switches operator to different channel if requested +- Approves pending InstallPlans for manual approval mode +- Monitors upgrade progress with detailed feedback +- Verifies upgrade success and reports any issues + +**Arguments:** +- `operator-name` (required): Name of the operator to upgrade +- `namespace` (optional): Namespace (auto-discovered if not provided) +- `--channel=` (optional): Switch to specified channel +- `--approve` (optional): Auto-approve pending InstallPlan + +See [commands/upgrade.md](commands/upgrade.md) for full documentation. + +--- + +#### `/olm:approve` - Approve InstallPlans + +Approve pending InstallPlans for operators with manual approval mode. + +**Usage:** +```bash +/olm:approve openshift-cert-manager-operator # Approve pending plan +/olm:approve external-secrets-operator eso-operator # Specific namespace +/olm:approve cert-manager ns --all # Approve all pending +``` + +**What it does:** +- Finds pending InstallPlans requiring manual approval +- Shows what will be installed/upgraded before approval +- Approves InstallPlans after user confirmation +- Monitors installation/upgrade execution +- Reports completion status + +**Arguments:** +- `operator-name` (required): Name of the operator +- `namespace` (optional): Namespace (auto-discovered if not provided) +- `--all` (optional): Approve all pending InstallPlans + +See [commands/approve.md](commands/approve.md) for full documentation. + +--- + +#### `/olm:uninstall` - Uninstall Operators + +Safely uninstall operators with optional resource cleanup. + +**Usage:** +```bash +/olm:uninstall openshift-cert-manager-operator # Basic uninstall +/olm:uninstall operator-name my-namespace # Custom namespace +/olm:uninstall operator-name ns --remove-crds # Include CRDs +/olm:uninstall operator-name ns --remove-crds --remove-namespace # Full cleanup +``` + +**What it does:** +- Removes Subscription and CSV +- Checks for and handles orphaned custom resources +- Removes operator deployments +- Optionally removes CRDs (with cluster-wide impact warning) +- Optionally removes namespace +- Detects and handles stuck Terminating namespaces +- Provides detailed uninstallation summary +- Post-uninstall verification + +**Arguments:** +- `operator-name` (required): Name of the operator +- `namespace` (optional): Target namespace (defaults to operator name) +- `--remove-crds` (optional): Remove CRDs - **CAUTION: affects entire cluster** +- `--remove-namespace` (optional): Remove namespace +- `--force` (optional): Skip confirmation prompts + +See [commands/uninstall.md](commands/uninstall.md) for full documentation. + +--- + +#### `/olm:diagnose` - Diagnose and Fix Issues + +Diagnose common OLM and operator issues with optional auto-fix. + +**Usage:** +```bash +/olm:diagnose # Cluster-wide health check +/olm:diagnose openshift-cert-manager-operator # Check specific operator +/olm:diagnose "" stuck-namespace --fix # Fix stuck namespace +/olm:diagnose --cluster --fix # Full scan and fix +``` + +**What it does:** +- Scans for orphaned CRDs from deleted operators +- Detects namespaces stuck in Terminating state +- Identifies failed operator installations +- Checks for conflicting OperatorGroups +- Verifies catalog source health +- Detects Subscription/CSV mismatches +- Lists pending manual approvals +- Generates comprehensive troubleshooting report +- Optionally attempts to fix detected issues + +**Arguments:** +- `operator-name` (optional): Specific operator to diagnose +- `namespace` (optional): Specific namespace to check +- `--fix` (optional): Attempt automatic fixes with confirmation +- `--cluster` (optional): Run cluster-wide diagnostics + +See [commands/diagnose.md](commands/diagnose.md) for full documentation. + +--- + +#### `/olm:catalog` - Manage Catalog Sources + +Manage catalog sources for operator discovery and installation. + +**Usage:** +```bash +/olm:catalog list # List all catalogs +/olm:catalog add my-catalog registry.io/catalog:v1 # Add custom catalog +/olm:catalog remove my-catalog # Remove catalog +/olm:catalog refresh redhat-operators # Refresh catalog +/olm:catalog status custom-catalog # Check catalog health +``` + +**What it does:** +- Lists all catalog sources with health status +- Adds custom or private catalog sources +- Removes catalog sources (with operator usage warnings) +- Refreshes catalogs to get latest operator updates +- Checks catalog source health and connectivity +- Shows catalog pod status and troubleshooting info + +**Subcommands:** +- `list`: Show all catalog sources +- `add `: Add new catalog source +- `remove `: Remove catalog source +- `refresh `: Force catalog refresh +- `status `: Check catalog health + +See [commands/catalog.md](commands/catalog.md) for full documentation. + +--- + +### Debugging Commands + +#### `/olm:debug` - Debug OLM Issues Debug OLM issues using must-gather logs and source code analysis. @@ -39,9 +324,7 @@ Debug OLM issues using must-gather logs and source code analysis. /olm:debug "Operator upgrade from v1.0 to v2.0 fails with dependency resolution error" ~/Downloads/must-gather.local.123456 olmv0 ``` -## How It Works - -The `olm:debug` command performs the following steps: +**How it works:** 1. **Extracts OCP version** from the must-gather logs 2. **Clones appropriate repositories**: @@ -54,7 +337,7 @@ The `olm:debug` command performs the following steps: 7. **Correlates errors with source code** to identify root causes 8. **Generates a comprehensive analysis report** with recommendations and links to related Jira issues -## Output +**Output:** The command creates a working directory at `.work/olm-debug//` containing: @@ -64,19 +347,99 @@ The command creates a working directory at `.work/olm-debug//` contai - `known-bugs.md`: List of potentially related Jira bugs with match confidence and workarounds - `repos/`: Cloned repository directories -## Prerequisites +See [commands/debug.md](commands/debug.md) for full documentation. + +--- + +## Example Workflows + +### Quick Start - Install and Monitor + +```bash +# Search for operator +/olm:search cert-manager + +# Install operator +/olm:install openshift-cert-manager-operator + +# Check status +/olm:status openshift-cert-manager-operator + +# List all operators +/olm:list +``` + +### Production Workflow - Manual Approval + +```bash +# Install with manual approval for better control +/olm:install external-secrets-operator eso-operator stable-v0.10 redhat-operators --approval=Manual -- `git` must be installed -- Network access to GitHub and Jira (https://issues.redhat.com/) -- Valid must-gather logs from an OpenShift cluster -- (Optional) Jira credentials for full access to bug details +# Check for updates +/olm:status external-secrets-operator eso-operator + +# Upgrade when ready +/olm:upgrade external-secrets-operator eso-operator + +# Approve the upgrade +/olm:approve external-secrets-operator eso-operator +``` + +### Troubleshooting Workflow + +```bash +# Operator not working properly +/olm:status problematic-operator + +# Run diagnostics +/olm:diagnose problematic-operator + +# If issues found, attempt fixes +/olm:diagnose problematic-operator namespace --fix +``` + +### Clean Uninstall Workflow + +```bash +# Check operator status before uninstalling +/olm:status openshift-cert-manager-operator + +# Uninstall with full cleanup +/olm:uninstall openshift-cert-manager-operator cert-manager-operator --remove-crds --remove-namespace + +# Verify cleanup +/olm:diagnose --cluster +``` + +### Catalog Management Workflow + +```bash +# List available catalogs +/olm:catalog list + +# Add custom catalog +/olm:catalog add my-operators registry.internal.com/operators:v1.0 + +# Search for operators in new catalog +/olm:search --catalog my-operators + +# Check catalog health +/olm:catalog status my-operators +``` + +### Advanced Debugging Workflow + +```bash +# Debug OLM issues using must-gather logs +/olm:debug "CSV stuck in pending" /path/to/must-gather +``` ## OLM Version Support ### OLMv0 - Used in OpenShift 4.x (traditional OLM) - Repository: [operator-framework-olm](https://github.com/openshift/operator-framework-olm) -- Key resources: CSV, Subscription, InstallPlan +- Key resources: CSV, Subscription, InstallPlan, OperatorGroup ### OLMv1 - Next-generation OLM architecture @@ -87,28 +450,73 @@ The command creates a working directory at `.work/olm-debug//` contai ## Troubleshooting -**Issue**: Cannot determine OCP version from must-gather +### Operator Not Found +```bash +/olm:search # Search for operator +oc get packagemanifests -n openshift-marketplace # List manually +/olm:catalog list # Check catalog sources +``` + +### Installation Issues +```bash +/olm:status # Check detailed status +/olm:diagnose # Run diagnostics +oc get csv -n # Check CSV manually +oc describe csv -n # Detailed CSV info +``` + +### Upgrade Issues +```bash +/olm:status # Check for pending upgrades +/olm:approve # Approve if manual mode +/olm:diagnose --fix # Fix issues +``` + +### Uninstallation Issues +```bash +# CSV won't delete +oc get csv -n -o yaml | grep finalizers + +# Namespace stuck in Terminating +/olm:diagnose "" --fix + +# Orphaned resources +/olm:diagnose --cluster +``` + +### Catalog Source Issues +```bash +/olm:catalog status # Check catalog health +/olm:catalog refresh # Refresh catalog +oc logs -n openshift-marketplace # Check logs +``` + +### Debugging Issues + +**Cannot determine OCP version from must-gather:** - **Solution**: Manually specify the OCP version when prompted, or check that the must-gather is complete -**Issue**: Repository clone fails +**Repository clone fails:** - **Solution**: Check network connectivity and GitHub access. You can manually clone the repositories and point the command to them. -**Issue**: Branch not found for OCP version +**Branch not found for OCP version:** - **Solution**: The command will fall back to the `main` branch. Be aware that there may be version differences. -**Issue**: Jira access fails or returns no results -- **Solution**: Check network connectivity to https://issues.redhat.com/. The command will continue with analysis even if Jira is unavailable. For full access, you may need to authenticate. +**Jira access fails or returns no results:** +- **Solution**: Check network connectivity to https://issues.redhat.com/. The command will continue with analysis even if Jira is unavailable. -**Issue**: Too many potential bug matches returned +**Too many potential bug matches returned:** - **Solution**: Review the `known-bugs.md` file and focus on high-confidence matches. Verify each match by reading the full bug description in Jira. ## Resources -- [OLM Documentation](https://olm.operatorframework.io/) -- [OpenShift OLM Documentation](https://docs.openshift.com/container-platform/latest/operators/understanding/olm/olm-understanding-olm.html) +- [Red Hat OpenShift: Operators Documentation](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/) +- [Red Hat OpenShift: Administrator Tasks](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks) +- [Red Hat OpenShift: Troubleshooting Operators](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-troubleshooting-operator-issues) +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) +- [OperatorHub.io](https://operatorhub.io/) - Browse operators online - [Must-gather Documentation](https://docs.openshift.com/container-platform/latest/support/gathering-cluster-data.html) - [OCPBUGS Jira Project](https://issues.redhat.com/projects/OCPBUGS/) -- [Jira REST API Documentation](https://docs.atlassian.com/jira-software/REST/latest/) ## Contributing @@ -117,6 +525,7 @@ To add new commands to this plugin: 1. Create a new `.md` file in `plugins/olm/commands/` 2. Follow the command definition format in existing commands 3. Update this README with the new command documentation +4. Run `make lint` to validate the plugin structure ## Support diff --git a/plugins/olm/commands/approve.md b/plugins/olm/commands/approve.md new file mode 100644 index 0000000..3aa5437 --- /dev/null +++ b/plugins/olm/commands/approve.md @@ -0,0 +1,305 @@ +--- +description: Approve pending InstallPlans for operator installations and upgrades +argument-hint: [namespace] [--all] +--- + +## Name +olm:approve + +## Synopsis +``` +/olm:approve [namespace] [--all] +``` + +## Description +The `olm:approve` command approves pending InstallPlans for operators with manual approval mode. This is required for operators that have `installPlanApproval: Manual` in their Subscription to proceed with installation or upgrades. + +This command helps you: +- Approve operator installations that are waiting for manual approval +- Approve operator upgrades +- Review what will be installed/upgraded before approval +- Batch approve multiple pending InstallPlans + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Operator name (required) - Name of the operator + - `$2`: Namespace (optional) - Namespace where operator is installed + - If not provided, searches for the operator across all namespaces + - `$3`: Flag (optional): + - `--all`: Approve all pending InstallPlans in the namespace + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - Check if user has sufficient privileges + +3. **Locate Operator**: + - If namespace provided, verify operator exists: + ```bash + oc get subscription {operator-name} -n {namespace} --ignore-not-found + ``` + - If no namespace provided, search across all namespaces: + ```bash + oc get subscription --all-namespaces -o json | jq -r '.items[] | select(.spec.name=="{operator-name}") | .metadata.namespace' + ``` + - If not found, display error with suggestions + +4. **Check Subscription Approval Mode**: + - Get Subscription approval mode: + ```bash + oc get subscription {operator-name} -n {namespace} -o jsonpath='{.spec.installPlanApproval}' + ``` + - If mode is "Automatic", display informational message: + ``` + ℹ️ Operator '{operator-name}' has automatic approval enabled. + InstallPlans are approved automatically and don't require manual intervention. + + Current Subscription approval mode: Automatic + + To switch to manual approval mode: + oc patch subscription {operator-name} -n {namespace} \ + --type merge --patch '{"spec":{"installPlanApproval":"Manual"}}' + ``` + - Exit if automatic (no approval needed) + +5. **Find Pending InstallPlans**: + - Get all InstallPlans for the operator: + ```bash + oc get installplan -n {namespace} -o json + ``` + - Filter for unapproved plans related to this operator: + ```bash + oc get installplan -n {namespace} -o json | \ + jq '.items[] | select(.spec.approved==false and .spec.clusterServiceVersionNames[] | contains("{operator-name}"))' + ``` + - If no pending InstallPlans found: + ``` + ✓ No pending InstallPlans found for operator '{operator-name}' + + The operator is up to date or already approved. + + To check operator status: /olm:status {operator-name} {namespace} + ``` + - Exit with success + +6. **Display InstallPlan Details**: + For each pending InstallPlan, display: + ``` + ⏸️ Pending InstallPlan Found + + InstallPlan: {installplan-name} + Namespace: {namespace} + Phase: {phase} + Approved: false + + ClusterServiceVersions to be installed/upgraded: + - {csv-name-1} ({version-1}) + - {csv-name-2} ({version-2}) + + Resources to be created/updated: + - CustomResourceDefinitions: {crd-count} + - ServiceAccounts: {sa-count} + - ClusterRoles: {role-count} + - Deployments: {deployment-count} + + [If upgrade:] + Current Version: {current-version} + Target Version: {target-version} + ``` + +7. **Request User Confirmation** (unless `--all` or `--force` flag): + - Display confirmation prompt: + ``` + Do you want to approve this InstallPlan? (yes/no) + ``` + - If user says no, skip this InstallPlan + - If user says yes, proceed to approval + +8. **Approve InstallPlan**: + - Patch the InstallPlan to approve it: + ```bash + oc patch installplan {installplan-name} -n {namespace} \ + --type merge --patch '{"spec":{"approved":true}}' + ``` + - Verify approval: + ```bash + oc get installplan {installplan-name} -n {namespace} -o jsonpath='{.spec.approved}' + ``` + - Display confirmation: + ``` + ✓ InstallPlan approved: {installplan-name} + ``` + - Reference: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-approving-operator-upgrades_olm-updating-operators + +9. **Monitor InstallPlan Execution** (optional): + - Watch InstallPlan phase change to "Complete": + ```bash + oc get installplan {installplan-name} -n {namespace} -w --timeout=120s + ``` + - Display progress: + ``` + 🔄 InstallPlan executing... + ⏳ Installing resources... + ``` + +10. **Verify Installation/Upgrade**: + - Wait for CSV to reach "Succeeded" phase: + ```bash + oc get csv -n {namespace} -o json | \ + jq -r '.items[] | select(.status.phase=="Succeeded") | .metadata.name' + ``` + - Display result: + ``` + ✓ Operator installation/upgrade complete + + CSV: {csv-name} + Version: {version} + Phase: Succeeded + + To check operator status: /olm:status {operator-name} {namespace} + ``` + +11. **Handle Multiple InstallPlans** (if `--all` flag): + - Process all pending InstallPlans for the operator + - Display summary: + ``` + ✓ Approved {count} InstallPlan(s) + + Approved: + - {installplan-1} + - {installplan-2} + + Monitoring installation progress... + ``` + +12. **Display Approval Summary**: + ``` + ✓ Approval Complete! + + Operator: {operator-name} + Namespace: {namespace} + Approved InstallPlans: {count} + + InstallPlan Status: + - {installplan-1}: Complete + - {installplan-2}: Installing... + + Monitor progress: watch oc get csv,installplan -n {namespace} + ``` + +## Return Value +- **Success**: InstallPlan(s) approved successfully +- **No Pending Plans**: No InstallPlans require approval +- **Automatic Mode**: Operator has automatic approval (no action needed) +- **Error**: Approval failed with specific error message +- **Format**: Structured output showing: + - Approved InstallPlan names + - Installation/upgrade status + - Next steps or related commands + +## Examples + +1. **Approve pending InstallPlan for an operator**: + ``` + /olm:approve openshift-cert-manager-operator + ``` + +2. **Approve with specific namespace**: + ``` + /olm:approve external-secrets-operator eso-operator + ``` + +3. **Approve all pending InstallPlans**: + ``` + /olm:approve openshift-cert-manager-operator cert-manager-operator --all + ``` + This approves all pending InstallPlans for the operator in the namespace. + +4. **Check and approve after upgrade command**: + ``` + /olm:upgrade openshift-cert-manager-operator --channel=tech-preview + # Wait for InstallPlan to be created + /olm:approve openshift-cert-manager-operator + ``` + +## Arguments +- **$1** (operator-name): Name of the operator (required) + - Example: "openshift-cert-manager-operator" + - Must match the operator's Subscription name +- **$2** (namespace): Namespace where operator is installed (optional) + - If not provided, searches all namespaces + - Example: "cert-manager-operator" +- **$3** (flag): Optional flag + - `--all`: Approve all pending InstallPlans for this operator + - Useful when multiple upgrades are pending + - Skips individual confirmation prompts + +## Notes + +- **Manual Approval Mode**: This command only works for operators with `installPlanApproval: Manual` in their Subscription +- **Automatic Operators**: Operators with automatic approval don't need this command +- **Review Before Approval**: Always review what will be installed/upgraded before approving +- **Multiple InstallPlans**: An operator may have multiple pending InstallPlans if updates accumulated while waiting for approval +- **InstallPlan Retention**: Approved InstallPlans remain in the namespace for audit purposes + +## Troubleshooting + +- **No pending InstallPlans**: + ```bash + # List all InstallPlans + oc get installplan -n {namespace} + + # Check if operator is in automatic mode + oc get subscription {operator-name} -n {namespace} -o jsonpath='{.spec.installPlanApproval}' + ``` + +- **InstallPlan not executing after approval**: + ```bash + # Check InstallPlan status + oc describe installplan {installplan-name} -n {namespace} + + # Check for errors + oc get events -n {namespace} --sort-by='.lastTimestamp' | grep InstallPlan + ``` + +- **CSV not reaching Succeeded phase**: + ```bash + # Check CSV status + oc describe csv -n {namespace} + + # Check operator deployment + oc get deployments -n {namespace} + + # Check operator logs + oc logs -n {namespace} deployment/{operator-deployment} + ``` + +- **Permission denied**: + ```bash + # Check if you can patch InstallPlans + oc auth can-i patch installplan -n {namespace} + ``` + +- **Multiple namespaces found**: + - Specify the namespace explicitly in the command: + ``` + /olm:approve {operator-name} {specific-namespace} + ``` + +## Related Commands + +- `/olm:status ` - Check if InstallPlans are pending approval +- `/olm:upgrade ` - Trigger upgrade and approve in one command +- `/olm:install ` - Install operator with approval mode +- `/olm:list` - List operators and their approval modes + +## Additional Resources + +- [Red Hat OpenShift: Approving Operator Upgrades](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-approving-operator-upgrades_olm-updating-operators) +- [Red Hat OpenShift: Updating Installed Operators](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-updating-operators) +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + + diff --git a/plugins/olm/commands/catalog.md b/plugins/olm/commands/catalog.md new file mode 100644 index 0000000..cd43964 --- /dev/null +++ b/plugins/olm/commands/catalog.md @@ -0,0 +1,433 @@ +--- +description: Manage catalog sources for discovering and installing operators +argument-hint: [arguments] +--- + +## Name +olm:catalog + +## Synopsis +``` +/olm:catalog list +/olm:catalog add [--namespace=openshift-marketplace] +/olm:catalog remove [--namespace=openshift-marketplace] +/olm:catalog refresh [--namespace=openshift-marketplace] +/olm:catalog status [--namespace=openshift-marketplace] +``` + +## Description +The `olm:catalog` command manages catalog sources for operator discovery and installation. Catalog sources provide the list of operators available for installation in the cluster. + +This command helps you: +- List all available catalog sources and their health status +- Add custom or private catalog sources +- Remove catalog sources +- Refresh catalog sources to get latest operator updates + +## Implementation + +### Subcommand: list + +1. **Get All CatalogSources**: + ```bash + oc get catalogsource -n openshift-marketplace -o json + ``` + +2. **Parse CatalogSource Data**: + For each catalog, extract: + - Name: `.metadata.name` + - Display Name: `.spec.displayName` + - Publisher: `.spec.publisher` + - Source Type: `.spec.sourceType` (grpc, configmap, etc.) + - Image: `.spec.image` (for grpc type) + - Connection State: `.status.connectionState.lastObservedState` + - Last Updated: `.status.connectionState.lastUpdatedTime` + - Number of Operators: Count from PackageManifests with this catalog + +3. **Get Catalog Pod Status**: + ```bash + oc get pods -n openshift-marketplace -l olm.catalogSource={catalog-name} + ``` + +4. **Format Output**: + ``` + ═══════════════════════════════════════════════════════════ + CATALOG SOURCES + ═══════════════════════════════════════════════════════════ + + NAME STATUS OPERATORS LAST UPDATED SOURCE TYPE + redhat-operators READY 150 2h ago grpc + certified-operators READY 45 3h ago grpc + community-operators READY 200 1h ago grpc + redhat-marketplace READY 30 4h ago grpc + custom-catalog FAILED 0 - grpc + + ═══════════════════════════════════════════════════════════ + DETAILS + ═══════════════════════════════════════════════════════════ + + redhat-operators: + Display Name: Red Hat Operators + Publisher: Red Hat + Image: registry.redhat.io/redhat/redhat-operator-index:v4.20 + Pod: redhat-operators-abc123 (Running) + + custom-catalog (FAILED): + Display Name: Custom Catalog + Publisher: My Company + Image: registry.example.com/custom-catalog:latest + Pod: custom-catalog-xyz789 (CrashLoopBackOff) + Error: ImagePullBackOff + + To troubleshoot: + /olm:catalog status custom-catalog + ``` + +### Subcommand: add + +1. **Parse Arguments**: + - `name`: Catalog source name (required) + - `image`: Catalog image (required) + - `--namespace`: Target namespace (default: openshift-marketplace) + - `--display-name`: Display name (optional) + - `--publisher`: Publisher name (optional) + +2. **Validate Image**: + - Check if image format is valid + - Optionally test image accessibility (if possible) + +3. **Create CatalogSource Manifest**: + ```yaml + apiVersion: operators.coreos.com/v1alpha1 + kind: CatalogSource + metadata: + name: {name} + namespace: {namespace} + spec: + sourceType: grpc + image: {image} + displayName: {display-name} + publisher: {publisher} + updateStrategy: + registryPoll: + interval: 30m + ``` + +4. **Apply CatalogSource**: + ```bash + oc apply -f /tmp/catalogsource-{name}.yaml + ``` + +5. **Wait for CatalogSource to be Ready**: + ```bash + oc wait --for=condition=READY catalogsource/{name} -n {namespace} --timeout=300s + ``` + +6. **Verify Pod is Running**: + ```bash + oc get pods -n {namespace} -l olm.catalogSource={name} + ``` + +7. **Display Result**: + ``` + ✓ Catalog source added: {name} + + Name: {name} + Namespace: {namespace} + Image: {image} + Status: READY + Pod: {pod-name} (Running) + + To search operators: /olm:search --catalog {name} + ``` + +### Subcommand: remove + +1. **Parse Arguments**: + - `name`: Catalog source name (required) + - `--namespace`: Namespace (default: openshift-marketplace) + +2. **Check if CatalogSource Exists**: + ```bash + oc get catalogsource {name} -n {namespace} --ignore-not-found + ``` + +3. **Check for Operators Using This Catalog**: + ```bash + oc get subscription --all-namespaces -o json | \ + jq -r '.items[] | select(.spec.source=="{name}") | "\(.metadata.namespace)/\(.metadata.name)"' + ``` + +4. **Display Warning** (if operators found): + ``` + WARNING: The following operators are using this catalog: + - namespace-1/operator-1 + - namespace-2/operator-2 + + Removing this catalog will prevent these operators from receiving updates. + + Do you want to continue? (yes/no) + ``` + +5. **Delete CatalogSource**: + ```bash + oc delete catalogsource {name} -n {namespace} + ``` + +6. **Wait for Pod to be Deleted**: + ```bash + oc wait --for=delete pod -l olm.catalogSource={name} -n {namespace} --timeout=60s + ``` + +7. **Display Result**: + ``` + ✓ Catalog source removed: {name} + ``` + +### Subcommand: refresh + +1. **Parse Arguments**: + - `name`: Catalog source name (required) + - `--namespace`: Namespace (default: openshift-marketplace) + +2. **Get Current CatalogSource**: + ```bash + oc get catalogsource {name} -n {namespace} -o json + ``` + +3. **Trigger Refresh by Deleting Pod**: + ```bash + oc delete pod -n {namespace} -l olm.catalogSource={name} + ``` + - This forces OLM to recreate the pod and re-fetch catalog data + +4. **Wait for New Pod to be Ready**: + ```bash + oc wait --for=condition=Ready pod -l olm.catalogSource={name} -n {namespace} --timeout=300s + ``` + +5. **Verify Catalog is Updated**: + ```bash + oc get catalogsource {name} -n {namespace} -o json | \ + jq -r '.status.connectionState.lastUpdatedTime' + ``` + +6. **Display Result**: + ``` + ✓ Catalog source refreshed: {name} + + Last Updated: {timestamp} + Status: READY + Pod: {pod-name} (Running) + + New operators may now be available: /olm:search --catalog {name} + ``` + +### Subcommand: status + +1. **Parse Arguments**: + - `name`: Catalog source name (required) + - `--namespace`: Namespace (default: openshift-marketplace) + +2. **Get CatalogSource Details**: + ```bash + oc get catalogsource {name} -n {namespace} -o json + ``` + +3. **Get Pod Details**: + ```bash + oc get pods -n {namespace} -l olm.catalogSource={name} -o json + ``` + +4. **Get Recent Events**: + ```bash + oc get events -n {namespace} --field-selector involvedObject.name={name} --sort-by='.lastTimestamp' + ``` + +5. **Count Available Operators**: + ```bash + oc get packagemanifests -n openshift-marketplace -o json | \ + jq -r '.items[] | select(.status.catalogSource=="{name}") | .metadata.name' | wc -l + ``` + +6. **Verify Catalog Connectivity**: + - Check if catalog is serving content by verifying PackageManifest count > 0 + - If count is 0 but pod is Running, indicates connectivity or catalog index issues + - Review catalog pod logs for gRPC errors, image pull issues, or index corruption: + ```bash + oc logs -n {namespace} {catalog-pod-name} + ``` + +7. **Format Comprehensive Status Report**: + ``` + ═══════════════════════════════════════════════════════════ + CATALOG SOURCE STATUS: {name} + ═══════════════════════════════════════════════════════════ + + General Information: + Name: {name} + Namespace: {namespace} + Display Name: {display-name} + Publisher: {publisher} + Source Type: {source-type} + Image: {image} + + Connection Status: + State: {state} (READY | CONNECTING | CONNECTION_FAILED) + Last Updated: {timestamp} + Last Successful: {timestamp} + + Pod Status: + Name: {pod-name} + Status: {status} (Running | CrashLoopBackOff | ImagePullBackOff) + Ready: {ready-containers}/{total-containers} + Restarts: {restart-count} + Age: {age} + + Catalog Content: + Operators Available: {count} + + [If issues detected:] + ⚠️ Issues Detected: + - Pod in CrashLoopBackOff + - Last update: 24h ago (stale) + - Connection state: CONNECTION_FAILED + + Recent Events: + {timestamp} Warning: Failed to pull image + {timestamp} Warning: Back-off restarting failed container + + Troubleshooting Steps: + 1. Check pod logs: oc logs -n {namespace} {pod-name} + 2. Check image accessibility + 3. Refresh catalog: /olm:catalog refresh {name} + 4. Verify network connectivity (for disconnected environments) + + Related Commands: + - Refresh: /olm:catalog refresh {name} + - List operators: /olm:search --catalog {name} + ``` + +## Return Value +- **list**: Table of all catalog sources with status +- **add**: Confirmation of added catalog with details +- **remove**: Confirmation of removed catalog +- **refresh**: Confirmation of refresh with updated timestamp +- **status**: Comprehensive status report for specific catalog + +## Examples + +1. **List all catalog sources**: + ``` + /olm:catalog list + ``` + +2. **Add custom catalog**: + ``` + /olm:catalog add my-catalog registry.example.com/my-catalog:v1.0 + ``` + +3. **Add catalog with metadata**: + ``` + /olm:catalog add my-catalog registry.example.com/catalog:latest \ + --display-name="My Custom Catalog" \ + --publisher="My Company" + ``` + +4. **Remove catalog**: + ``` + /olm:catalog remove my-catalog + ``` + +5. **Refresh catalog to get latest operators**: + ``` + /olm:catalog refresh redhat-operators + ``` + +6. **Check catalog health**: + ``` + /olm:catalog status custom-catalog + ``` + +7. **Add catalog for disconnected environment**: + ``` + /olm:catalog add disconnected-operators \ + mirror-registry.local:5000/olm/redhat-operators:v4.20 \ + --namespace=openshift-marketplace + ``` + +## Arguments + +### list +No arguments required. + +### add +- **name** (required): Name for the catalog source +- **image** (required): Container image containing the catalog +- **--namespace**: Target namespace (default: openshift-marketplace) +- **--display-name**: Human-readable display name +- **--publisher**: Publisher/organization name + +### remove +- **name** (required): Name of the catalog source to remove +- **--namespace**: Namespace (default: openshift-marketplace) + +### refresh +- **name** (required): Name of the catalog source to refresh +- **--namespace**: Namespace (default: openshift-marketplace) + +### status +- **name** (required): Name of the catalog source to check +- **--namespace**: Namespace (default: openshift-marketplace) + +## Troubleshooting + +- **Catalog pod failing**: + ```bash + # Check pod logs + oc logs -n openshift-marketplace {catalog-pod-name} + + # Check image pull issues + oc describe pod -n openshift-marketplace {catalog-pod-name} + ``` + +- **No operators showing up**: + ```bash + # Verify catalog is ready + /olm:catalog status {catalog-name} + + # Check PackageManifests + oc get packagemanifests -n openshift-marketplace + ``` + +- **Image pull errors (disconnected environment)**: + - Verify image registry is accessible + - Check pull secrets are configured + - Ensure image has been mirrored correctly + +- **Stale catalog data**: + ```bash + # Force refresh + /olm:catalog refresh {catalog-name} + ``` + +- **Connection failures**: + ```bash + # Check catalog source definition + oc get catalogsource {catalog-name} -n openshift-marketplace -o yaml + + # Run cluster diagnostics + /olm:diagnose --cluster + ``` + +## Related Commands + +- `/olm:search` - Search for operators in catalogs +- `/olm:install` - Install operators from catalogs +- `/olm:diagnose` - Diagnose catalog health issues + +## Additional Resources +- [Building Catalog Images with opm](https://olm.operatorframework.io/docs/tasks/creating-catalog-from-index/) +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + + diff --git a/plugins/olm/commands/diagnose.md b/plugins/olm/commands/diagnose.md new file mode 100644 index 0000000..83f6d27 --- /dev/null +++ b/plugins/olm/commands/diagnose.md @@ -0,0 +1,410 @@ +--- +description: Diagnose and optionally fix common OLM and operator issues +argument-hint: [operator-name] [namespace] [--fix] [--cluster] +--- + +## Name +olm:diagnose + +## Synopsis +``` +/olm:diagnose [operator-name] [namespace] [--fix] [--cluster] +``` + +## Description +The `olm:diagnose` command diagnoses common OLM and operator issues, including orphaned CRDs, stuck namespaces, failed installations, and catalog source problems. It can optionally attempt to fix detected issues automatically. + +This command helps you: +- Detect and clean up orphaned CRDs from deleted operators +- Fix namespaces stuck in Terminating state +- Identify and resolve failed operator installations +- Detect conflicting OperatorGroups +- Check catalog source health +- Identify resources preventing clean uninstallation +- Generate comprehensive troubleshooting reports + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Operator name (optional) - Specific operator to diagnose + - `$2`: Namespace (optional) - Specific namespace to check + - `$3+`: Flags (optional): + - `--fix`: Automatically attempt to fix detected issues (requires confirmation) + - `--cluster`: Run cluster-wide diagnostics (catalog sources, global CRDs, etc.) + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - Check if user has cluster-admin or sufficient privileges + - Warn if running without `--fix` flag (dry-run mode) + +3. **Determine Scope**: + - **Operator-specific**: If operator name provided, focus on that operator + - **Namespace-specific**: If namespace provided, check all operators in that namespace + - **Cluster-wide**: If `--cluster` flag or no arguments, check entire cluster + +4. **Scan for Orphaned CRDs**: + - Get all CRDs in the cluster: + ```bash + oc get crd -o json + ``` + - For each CRD, check if there's a corresponding operator: + - Look for CSVs that own this CRD + - Look for active Subscriptions related to this CRD + - Identify orphaned CRDs (no owning operator found): + ```bash + # Find CRDs without active operators + # This is a simplified check - actual implementation should verify operator ownership + oc get crd -o json | jq -r '.items[] | + select(.metadata.annotations["operators.coreos.com/owner"] // "" | length == 0) | + .metadata.name' + ``` + - Check if CRs exist for orphaned CRDs: + ```bash + oc get --all-namespaces --ignore-not-found + ``` + - Report findings: + ``` + ⚠️ Orphaned CRDs Detected + + The following CRDs have no active operator: + - certificates.cert-manager.io (3 CR instances in 2 namespaces) + - issuers.cert-manager.io (5 CR instances in 3 namespaces) + + These CRDs may be leftovers from uninstalled operators. + + [If --fix flag:] + Do you want to delete these CRDs and their CRs? (yes/no) + WARNING: This will delete all custom resources of these types! + ``` + +5. **Check for Stuck Namespaces**: + - Get all namespaces in Terminating state: + ```bash + oc get namespaces -o json | jq -r '.items[] | select(.status.phase=="Terminating") | .metadata.name' + ``` + - For each stuck namespace: + - Check remaining resources: + ```bash + oc api-resources --verbs=list --namespaced -o name | \ + xargs -n 1 oc get --show-kind --ignore-not-found -n {namespace} + ``` + - Check namespace finalizers: + ```bash + oc get namespace {namespace} -o jsonpath='{.metadata.finalizers}' + ``` + - Identify blocking resources + - Report findings: + ``` + ❌ Stuck Namespace Detected + + Namespace: {namespace} + State: Terminating (stuck for {duration}) + + Blocking resources: + - CustomResourceDefinition: {crd-name} (finalizer: {finalizer}) + - ServiceAccount: {sa-name} (token secret) + + Finalizers on namespace: + - kubernetes + + [If --fix flag:] + Attempted fixes: + 1. Delete remaining resources + 2. Remove finalizers from CRs + 3. Patch namespace to remove finalizers (CAUTION) + + WARNING: Force-deleting namespace can cause cluster instability. + ``` + +6. **Scan for Failed Operator Installations**: + - Get all CSVs not in "Succeeded" phase: + ```bash + oc get csv --all-namespaces -o json | \ + jq -r '.items[] | select(.status.phase != "Succeeded") | "\(.metadata.namespace)/\(.metadata.name): \(.status.phase)"' + ``` + - For each failed CSV: + - Get failure reason: `.status.reason` + - Get failure message: `.status.message` + - Check related InstallPlan status + - Check deployment status + - Check recent events + - Report findings: + ``` + ❌ Failed Operator Installation + + Operator: {operator-name} + Namespace: {namespace} + CSV: {csv-name} + Phase: Failed + Reason: {reason} + Message: {message} + + Related InstallPlan: {installplan-name} (Phase: {phase}) + + Recent Events: + - {timestamp} Warning: {event-message} + + Troubleshooting suggestions: + - Check operator logs: oc logs -n {namespace} deployment/{deployment} + - Check image pull issues: oc describe pod -n {namespace} + - Verify catalog source health + - Check RBAC permissions + ``` + +7. **Check for Conflicting OperatorGroups**: + - Get all OperatorGroups per namespace: + ```bash + oc get operatorgroup --all-namespaces -o json + ``` + - Identify namespaces with multiple OperatorGroups (conflict): + ```bash + oc get operatorgroup --all-namespaces -o json | \ + jq -r '.items | group_by(.metadata.namespace) | .[] | select(length > 1) | .[0].metadata.namespace' + ``` + - Check for OperatorGroups with overlapping target namespaces + - Report findings: + ``` + ⚠️ Conflicting OperatorGroups Detected + + Namespace: {namespace} + OperatorGroups: {count} + - {og-1} (targets: {target-namespaces-1}) + - {og-2} (targets: {target-namespaces-2}) + + Multiple OperatorGroups in a namespace can cause conflicts. + Only one OperatorGroup should exist per namespace. + + [If --fix flag:] + Keep which OperatorGroup? (1/2) + ``` + +8. **Verify Catalog Source Health** (if `--cluster` flag): + - Get all CatalogSources: + ```bash + oc get catalogsource -n openshift-marketplace -o json + ``` + - For each catalog: + - Check status: `.status.connectionState.lastObservedState` + - Check pod status + - Check last update time + - Verify grpc connection + - Report findings: + ``` + 🔍 Catalog Source Health Check + + ✓ redhat-operators: READY (last updated: 2h ago) + ✓ certified-operators: READY (last updated: 3h ago) + ✓ community-operators: READY (last updated: 1h ago) + ❌ custom-catalog: CONNECTION_FAILED (pod: CrashLoopBackOff) + + [If issues found:] + Unhealthy Catalog: custom-catalog + Pod: custom-catalog-abc123 (Status: CrashLoopBackOff) + + To troubleshoot: + oc logs -n openshift-marketplace custom-catalog-abc123 + oc describe catalogsource custom-catalog -n openshift-marketplace + ``` + +9. **Check for Subscription/CSV Mismatches**: + - Get all Subscriptions: + ```bash + oc get subscription --all-namespaces -o json + ``` + - For each Subscription: + - Compare `installedCSV` with `currentCSV` + - Check if CSV exists + - Verify CSV phase + - Report findings: + ``` + ⚠️ Subscription/CSV Mismatch + + Operator: {operator-name} + Namespace: {namespace} + Installed CSV: {installed-csv} + Current CSV: {current-csv} + + CSV {installed-csv} not found in namespace. + This may indicate a failed installation or upgrade. + + Suggested fix: + oc delete subscription {operator-name} -n {namespace} + /olm:install {operator-name} {namespace} + ``` + +10. **Check for Pending Manual Approvals**: + - Find all unapproved InstallPlans: + ```bash + oc get installplan --all-namespaces -o json | \ + jq -r '.items[] | select(.spec.approved==false)' + ``` + - Report findings: + ``` + ℹ️ Pending Manual Approvals + + The following operators have pending InstallPlans requiring approval: + + - Operator: openshift-cert-manager-operator + Namespace: cert-manager-operator + InstallPlan: install-abc123 + Target Version: v1.14.0 + To approve: /olm:approve openshift-cert-manager-operator cert-manager-operator + + - Operator: external-secrets-operator + Namespace: eso-operator + InstallPlan: install-def456 + Target Version: v0.11.0 + To approve: /olm:approve external-secrets-operator eso-operator + ``` + +11. **Generate Comprehensive Report**: + ``` + ═══════════════════════════════════════════════════════════ + OLM HEALTH CHECK REPORT + ═══════════════════════════════════════════════════════════ + + Scan Scope: [Operator-specific | Namespace | Cluster-wide] + Scan Time: {timestamp} + + ✓ HEALTHY CHECKS: {count} + - Catalog sources operational + - No conflicting OperatorGroups + - All CSVs in Succeeded phase + + ⚠️ WARNINGS: {count} + - {warning-count} orphaned CRDs detected + - {warning-count} pending manual approvals + + ❌ ERRORS: {count} + - {error-count} stuck namespaces + - {error-count} failed operator installations + - {error-count} unhealthy catalog sources + + ═══════════════════════════════════════════════════════════ + DETAILED FINDINGS + ═══════════════════════════════════════════════════════════ + + [Details for each finding...] + + ═══════════════════════════════════════════════════════════ + RECOMMENDATIONS + ═══════════════════════════════════════════════════════════ + + 1. Clean up orphaned CRDs: /olm:diagnose --fix + 2. Fix stuck namespace: /olm:diagnose {namespace} --fix + 3. Approve pending upgrades: /olm:approve {operator-name} + + For more details on troubleshooting, see: + https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-troubleshooting-operator-issues + ``` + +12. **Auto-Fix Issues** (if `--fix` flag): + - For each detected issue, ask for confirmation + - Attempt fixes based on issue type: + - **Orphaned CRDs**: Delete CRs first, then CRDs + - **Stuck namespaces**: Delete remaining resources, remove finalizers + - **Failed installations**: Restart by deleting and recreating + - **Conflicting OperatorGroups**: Remove unwanted OperatorGroup + - **Unhealthy catalogs**: Restart catalog pod + - Display results of each fix attempt + - Generate final summary + +## Return Value +- **Success**: Report generated with findings +- **Issues Found**: Detailed report with warnings and errors +- **Fixed**: Issues resolved (if `--fix` flag used) +- **Format**: Structured report showing: + - Summary of health checks + - Detailed findings for each issue + - Recommendations and next steps + - Links to documentation + +## Examples + +1. **Check specific operator**: + ``` + /olm:diagnose openshift-cert-manager-operator + ``` + +2. **Cluster-wide health check**: + ``` + /olm:diagnose --cluster + ``` + +3. **Diagnose and fix issues**: + ``` + /olm:diagnose openshift-cert-manager-operator cert-manager-operator --fix + ``` + +4. **Full cluster scan with auto-fix**: + ``` + /olm:diagnose --cluster --fix + ``` + +## Arguments +- **$1** (operator-name): Name of specific operator to diagnose (optional) + - If not provided, checks all operators (or cluster-wide with `--cluster`) + - Example: "openshift-cert-manager-operator" +- **$2** (namespace): Specific namespace to check (optional) + - If not provided with operator-name, searches all namespaces + - Example: "cert-manager-operator" +- **$3+** (flags): Optional flags + - `--fix`: Attempt to automatically fix detected issues + - Prompts for confirmation before each fix + - Use with caution in production environments + - `--cluster`: Run cluster-wide diagnostics + - Checks catalog sources + - Scans for orphaned CRDs across all namespaces + - Identifies global issues + +## Troubleshooting + +- **Permission denied**: + ```bash + # Check required permissions + oc auth can-i get crd + oc auth can-i get csv --all-namespaces + oc auth can-i patch namespace + ``` + +- **Unable to fix stuck namespace**: + - Some resources may require manual intervention + - Check API service availability: + ```bash + oc get apiservice + ``` + +- **CRDs won't delete**: + ```bash + # Check for remaining CRs + oc get --all-namespaces + + # Check for finalizers + oc get crd -o jsonpath='{.metadata.finalizers}' + ``` + +- **Catalog source issues persist**: + ```bash + # Restart catalog pod + oc delete pod -n openshift-marketplace + + # Check catalog source definition + oc get catalogsource -n openshift-marketplace -o yaml + ``` + +## Related Commands + +- `/olm:status ` - Check specific operator status +- `/olm:list` - List all operators +- `/olm:uninstall ` - Clean uninstall with orphan cleanup +- `/olm:approve ` - Approve pending InstallPlans + +## Additional Resources + +- [Troubleshooting Operator Issues](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-troubleshooting-operator-issues) +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + + diff --git a/plugins/olm/commands/install.md b/plugins/olm/commands/install.md new file mode 100644 index 0000000..ccc0bcf --- /dev/null +++ b/plugins/olm/commands/install.md @@ -0,0 +1,272 @@ +--- +description: Install a day-2 operator using Operator Lifecycle Manager +argument-hint: [namespace] [channel] [source] [--approval=Automatic|Manual] +--- + +## Name +olm:install + +## Synopsis +``` +/olm:install [namespace] [channel] [source] [--approval=Automatic|Manual] +``` + +## Description +The `olm:install` command installs a day-2 operator in an OpenShift cluster using Operator Lifecycle Manager (OLM). It automates the creation of the required namespace, OperatorGroup, and Subscription resources needed to install an operator. + +This command handles the complete operator installation workflow: +- Creates or verifies the target namespace exists +- Creates an OperatorGroup if needed +- Creates a Subscription to install the operator +- Verifies the installation by checking the operator's CSV (ClusterServiceVersion) status +- Provides detailed feedback on the installation progress + +The command is designed to work with operators from the OperatorHub catalog, including Red Hat certified operators, community operators, and custom catalog sources. + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Operator name (required) - The name of the operator to install (e.g., "openshift-cert-manager-operator") + - `$2`: Namespace (optional) - Target namespace for the operator. If not provided, defaults to `{operator-name}-operator` (e.g., "cert-manager-operator") + - `$3`: Channel (optional) - Subscription channel. If not provided, discovers the default channel from the operator's PackageManifest + - `$4`: Source (optional) - CatalogSource name. Defaults to "redhat-operators" for Red Hat operators + - `$5+`: Flags (optional): + - `--approval=Automatic|Manual`: InstallPlan approval mode (default: Automatic) + - Automatic: Operator upgrades are automatically installed + - Manual: Operator upgrades require manual approval via `/olm:approve` or `oc patch` + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - Check if user has cluster-admin or sufficient privileges + - If not installed or not authenticated, provide clear instructions + +3. **Discover Operator Metadata** (if channel or source not provided): + - Search for the operator in available catalogs: + ```bash + oc get packagemanifests -n openshift-marketplace | grep {operator-name} + ``` + - Get the PackageManifest details: + ```bash + oc get packagemanifest {operator-name} -n openshift-marketplace -o json + ``` + - Extract: + - Default channel: `.status.defaultChannel` + - CatalogSource: `.status.catalogSource` + - CatalogSourceNamespace: `.status.catalogSourceNamespace` + - If operator not found, provide error with list of available operators + +4. **Create Namespace**: + - Check if namespace exists: `oc get namespace {namespace} --ignore-not-found` + - If not exists, create it: + ```bash + oc create namespace {namespace} + ``` + - If exists, inform user and continue + +5. **Create OperatorGroup**: + - Check if OperatorGroup exists in the namespace: + ```bash + oc get operatorgroup -n {namespace} --ignore-not-found + ``` + - If no OperatorGroup exists, create one: + ```yaml + apiVersion: operators.coreos.com/v1 + kind: OperatorGroup + metadata: + name: {namespace}-operatorgroup + namespace: {namespace} + spec: + targetNamespaces: + - {namespace} + ``` + - Save to temporary file and apply: + ```bash + oc apply -f /tmp/operatorgroup-{operator-name}.yaml + ``` + - If OperatorGroup already exists, inform user and continue + +6. **Create Subscription**: + - Parse approval mode from flags (default: Automatic) + - Create Subscription manifest: + ```yaml + apiVersion: operators.coreos.com/v1alpha1 + kind: Subscription + metadata: + name: {operator-name} + namespace: {namespace} + spec: + channel: {channel} + name: {operator-name} + source: {source} + sourceNamespace: openshift-marketplace + installPlanApproval: {Automatic|Manual} + ``` + - Save to temporary file and apply: + ```bash + oc apply -f /tmp/subscription-{operator-name}.yaml + ``` + - Display the created subscription details + - If approval mode is Manual, display informational message: + ``` + ℹ️ InstallPlan approval set to Manual + You will need to manually approve InstallPlans for this operator. + Use: /olm:approve {operator-name} {namespace} + + Reference: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-approving-operator-upgrades_olm-updating-operators + ``` + +7. **Verify Installation**: + - Wait for InstallPlan to be created: + ```bash + oc get installplan -n {namespace} -l operators.coreos.com/operator={operator-name} + ``` + - If approval mode is Manual, check if InstallPlan needs approval: + ```bash + oc get installplan -n {namespace} -o json | jq '.items[] | select(.spec.approved==false)' + ``` + - If Manual and not approved, display message: + ``` + ⏸️ InstallPlan created but requires manual approval + + InstallPlan: {installplan-name} + To approve: /olm:approve {operator-name} {namespace} + Or manually: oc patch installplan {installplan-name} -n {namespace} \ + --type merge --patch '{"spec":{"approved":true}}' + + Waiting for approval... + ``` + - Wait for CSV to be created and reach "Succeeded" phase: + ```bash + oc get csv -n {namespace} -w + ``` + - Use a timeout of 5 minutes for the installation to complete (10 minutes if Manual approval) + - Poll every 10 seconds to check CSV status + - Display progress updates to the user + +8. **Display Results**: + - Show the installed operator's CSV name and version + - Show the operator deployment status: + ```bash + oc get deployments -n {namespace} + ``` + - List any pods created by the operator: + ```bash + oc get pods -n {namespace} + ``` + - Display success message with next steps or usage instructions + +9. **Cleanup Temporary Files**: + - Remove temporary YAML files created during installation: + ```bash + rm -f /tmp/operatorgroup-{operator-name}.yaml /tmp/subscription-{operator-name}.yaml + ``` + +## Return Value +- **Success**: Operator installed successfully with details about the CSV, deployments, and pods +- **Error**: Installation failed with specific error message and troubleshooting suggestions +- **Format**: Structured output showing: + - Namespace created/used + - OperatorGroup status + - Subscription created + - CSV status and version + - Deployment and pod status + +## Examples + +1. **Install cert-manager-operator with defaults**: + ``` + /olm:install openshift-cert-manager-operator + ``` + This will: + - Create namespace `cert-manager-operator` + - Discover default channel from PackageManifest + - Use `redhat-operators` catalog source + - Install the operator + +2. **Install cert-manager-operator with custom namespace**: + ``` + /olm:install openshift-cert-manager-operator my-cert-manager + ``` + This will install the operator in the `my-cert-manager` namespace. + +3. **Install with specific channel**: + ``` + /olm:install openshift-cert-manager-operator cert-manager-operator stable-v1 + ``` + This will install from the `stable-v1` channel. + +4. **Install from community catalog**: + ``` + /olm:install prometheus community-operators stable community-operators + ``` + This will install Prometheus from the community-operators catalog. + +5. **Install Red Hat Advanced Cluster Security**: + ``` + /olm:install rhacs-operator rhacs-operator stable + ``` + +6. **Install with manual approval mode**: + ``` + /olm:install openshift-cert-manager-operator cert-manager-operator stable-v1 redhat-operators --approval=Manual + ``` + This will install the operator but require manual approval for all upgrades. + +7. **Install with all parameters specified**: + ``` + /olm:install external-secrets-operator eso-operator stable-v0.10 redhat-operators --approval=Automatic + ``` + +## Arguments +- **$1** (operator-name): The name of the operator to install (required) + - Example: "openshift-cert-manager-operator" + - Must match the name in the operator's PackageManifest +- **$2** (namespace): Target namespace for the operator installation (optional) + - Default: `{operator-name}` (operator name without "openshift-" prefix if present) + - Example: "cert-manager-operator" +- **$3** (channel): Subscription channel (optional) + - Default: Auto-discovered from PackageManifest's default channel + - Example: "stable-v1", "tech-preview", "stable" +- **$4** (source): CatalogSource name (optional) + - Default: "redhat-operators" + - Other options: "certified-operators", "community-operators", "redhat-marketplace" +- **$5+** (flags): Optional flags + - `--approval=Automatic|Manual`: InstallPlan approval mode + - **Automatic** (default): Operator upgrades are automatically installed without user intervention + - **Manual**: Operator upgrades require explicit approval. Useful for: + - Production environments requiring change control + - Testing upgrades before applying + - Preventing unexpected operator updates + - Reference: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-approving-operator-upgrades_olm-updating-operators + +## Notes + +- **Automatic Channel Discovery**: If no channel is specified, the command automatically discovers and uses the operator's default channel from its PackageManifest +- **Namespace Convention**: By default, operators are installed in a namespace following the pattern `{operator-name}-operator` +- **OperatorGroup Scope**: The created OperatorGroup targets only the installation namespace for better isolation +- **InstallPlan Approval**: Set to "Automatic" by default for seamless installation. Can be changed to "Manual" using `--approval=Manual` flag +- **Manual Approval Mode**: When using `--approval=Manual`: + - Initial installation may require manual approval of the InstallPlan + - All future upgrades will require explicit approval via `/olm:approve` command + - Provides better control over operator updates in production environments +- **Verification Timeout**: The command waits up to 5 minutes for the operator to install successfully (10 minutes for manual approval mode) +- **Cleanup**: Temporary YAML files are automatically removed after installation + +## Troubleshooting + +- **Operator not found**: Run `oc get packagemanifests -n openshift-marketplace` to see available operators +- **Permission denied**: Ensure you have cluster-admin privileges or the necessary RBAC permissions +- **Installation timeout**: Check the InstallPlan and CSV status manually: + ```bash + oc get installplan -n {namespace} + oc get csv -n {namespace} + oc describe csv -n {namespace} + ``` +- **Operator pod not starting**: Check pod logs: + ```bash + oc logs -n {namespace} deployment/{operator-deployment} + ``` + diff --git a/plugins/olm/commands/list.md b/plugins/olm/commands/list.md new file mode 100644 index 0000000..f93abae --- /dev/null +++ b/plugins/olm/commands/list.md @@ -0,0 +1,174 @@ +--- +description: List installed operators in the cluster +argument-hint: [namespace] [--all-namespaces] +--- + +## Name +olm:list + +## Synopsis +``` +/olm:list [namespace] [--all-namespaces] +``` + +## Description +The `olm:list` command lists all installed operators in an OpenShift cluster, showing their status, version, and namespace. This command provides a quick overview of the operator landscape in your cluster. + +This command helps you: +- Discover what operators are currently installed +- Check operator versions and status at a glance +- Identify operators that may need attention (failed, upgrading, etc.) +- Get a comprehensive view across namespaces + +The command presents information in an easy-to-read table format with key details about each operator's ClusterServiceVersion (CSV) and Subscription. + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Namespace (optional) - Target namespace to list operators from + - `$2`: Flag (optional): + - `--all-namespaces` or `-A`: List operators across all namespaces (default behavior if no namespace specified) + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - If not installed or not authenticated, provide clear instructions + +3. **Determine Scope**: + - If namespace is specified: List operators only in that namespace + - If `--all-namespaces` flag or no arguments: List operators cluster-wide + - Default behavior: Show all operators across all namespaces + +4. **Fetch Operator Data**: + - Get all ClusterServiceVersions (CSVs): + ```bash + # For specific namespace + oc get csv -n {namespace} -o json + + # For all namespaces + oc get csv --all-namespaces -o json + ``` + - Get all Subscriptions: + ```bash + # For specific namespace + oc get subscription -n {namespace} -o json + + # For all namespaces + oc get subscription --all-namespaces -o json + ``` + +5. **Parse and Correlate Data**: + - For each CSV, extract: + - Name: `.metadata.name` + - Namespace: `.metadata.namespace` + - Display Name: `.spec.displayName` + - Version: `.spec.version` + - Phase/Status: `.status.phase` (e.g., "Succeeded", "Installing", "Failed") + - Install Time: `.metadata.creationTimestamp` + - For each Subscription, extract: + - Operator Name: `.spec.name` + - Channel: `.spec.channel` + - Source: `.spec.source` + - Installed CSV: `.status.installedCSV` + - Current CSV: `.status.currentCSV` + - Correlate Subscriptions with CSVs to show complete operator information + +6. **Format Output as Table**: + Create a formatted table with columns: + ``` + NAMESPACE OPERATOR NAME VERSION STATUS CHANNEL SOURCE + cert-manager-operator cert-manager-operator v1.13.1 Succeeded stable-v1 redhat-operators + external-secrets-operator external-secrets-operator v0.10.5 Succeeded stable-v0.10 redhat-operators + openshift-pipelines openshift-pipelines-operator-rh v1.14.4 Succeeded latest redhat-operators + ``` + +7. **Add Summary Statistics**: + - Total operators installed: X + - By status: + - Succeeded: X + - Installing: X + - Upgrading: X + - Failed: X + - By catalog source: + - redhat-operators: X + - certified-operators: X + - community-operators: X + - custom catalogs: X + +8. **Highlight Issues** (if any): + - List operators with status other than "Succeeded": + ``` + ⚠️ Operators requiring attention: + - namespace/operator-name: Failed (reason: ...) + - namespace/operator-name: Installing (waiting for...) + ``` + +9. **Provide Actionable Suggestions**: + - If operators are in "Failed" state, suggest: `/olm:status {operator-name} {namespace}` for details + - If no operators found, suggest: `/olm:search {operator-name}` to find available operators + - If upgrades available, suggest: `/olm:status {operator-name}` to check upgrade options + +## Return Value +- **Success**: Formatted table of installed operators with summary statistics +- **Empty**: No operators found message with suggestion to install operators +- **Error**: Connection or permission error with troubleshooting guidance +- **Format**: + - Table with columns: NAMESPACE, OPERATOR NAME, VERSION, STATUS, CHANNEL, SOURCE + - Summary statistics + - Warnings for operators requiring attention + +## Examples + +1. **List all operators cluster-wide**: + ``` + /olm:list + ``` + +2. **List operators in a specific namespace**: + ``` + /olm:list cert-manager-operator + `` + +## Arguments +- **$1** (namespace): Target namespace to list operators from (optional) + - If not provided, lists operators from all namespaces + - Example: "cert-manager-operator" +- **$2** (flag): Optional flag (optional) + - `--all-namespaces` or `-A`: Explicitly list all operators cluster-wide + - Default behavior if no namespace is provided + +## Notes + +- **Performance**: For large clusters with many operators, the command may take a few seconds to collect all data +- **Status Values**: Common CSV status values include: + - `Succeeded`: Operator is healthy and running + - `Installing`: Operator is being installed + - `Upgrading`: Operator is being upgraded + - `Failed`: Operator installation or operation failed + - `Replacing`: Old version being replaced + - `Deleting`: Operator is being removed +- **Correlation**: The command correlates Subscriptions with CSVs to provide complete operator information +- **Sorting**: Results are sorted by namespace, then by operator name + +## Troubleshooting + +- **Permission denied**: Ensure you have permissions to list CSVs and Subscriptions: + ```bash + oc auth can-i list csv --all-namespaces + oc auth can-i list subscription --all-namespaces + ``` +- **Slow response**: For large clusters, use namespace-specific queries to speed up results +- **Missing operators**: Some operators may not have Subscriptions if installed manually; these will still appear based on CSV presence +- **Version mismatch**: If Subscription's `installedCSV` differs from `currentCSV`, an upgrade may be in progress + +## Related Commands + +- `/olm:status [namespace]` - Get detailed status of a specific operator +- `/olm:install ` - Install a new operator +- `/olm:search ` - Search for available operators in catalogs + +## Additional Resources +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + diff --git a/plugins/olm/commands/search.md b/plugins/olm/commands/search.md new file mode 100644 index 0000000..72773c6 --- /dev/null +++ b/plugins/olm/commands/search.md @@ -0,0 +1,247 @@ +--- +description: Search for available operators in catalog sources +argument-hint: [query] [--catalog ] +--- + +## Name +olm:search + +## Synopsis +``` +/olm:search [query] [--catalog ] +``` + +## Description +The `olm:search` command searches for available operators in the cluster's catalog sources (OperatorHub). It helps you discover operators that can be installed, showing their names, descriptions, versions, channels, and catalog sources. + +This command helps you: +- Find operators by name, description, or keywords +- Discover what operators are available for installation +- View operator details before installing +- Check available versions and channels +- Identify which catalog source contains a specific operator + +The command searches across all available catalog sources (redhat-operators, certified-operators, community-operators, redhat-marketplace, and custom catalogs) and presents results in an easy-to-read format. + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Query string (optional) - Search term for filtering operators + - If not provided, lists all available operators + - Can be partial name, keyword, or description + - `$2+`: Flags (optional): + - `--catalog `: Limit search to specific catalog source + - `--exact`: Only show exact name matches + - `--installed`: Show only installed operators (combination with /olm:list) + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - If not installed or not authenticated, provide clear instructions + +3. **Fetch Catalog Data**: + - Get all PackageManifests from openshift-marketplace: + ```bash + oc get packagemanifests -n openshift-marketplace -o json + ``` + - If `--catalog` flag is specified, filter by catalog source: + ```bash + oc get packagemanifests -n openshift-marketplace -o json | jq '.items[] | select(.status.catalogSource=="{catalog-name}")' + ``` + +4. **Parse PackageManifest Data**: + - For each PackageManifest, extract: + - Name: `.metadata.name` + - Display Name: `.status.channels[0].currentCSVDesc.displayName` + - Description: `.status.channels[0].currentCSVDesc.description` + - Provider: `.status.provider.name` + - Catalog Source: `.status.catalogSource` + - Catalog Namespace: `.status.catalogSourceNamespace` + - Default Channel: `.status.defaultChannel` + - All Channels: `.status.channels[].name` + - Latest Version: `.status.channels[] | select(.name==.status.defaultChannel) | .currentCSVDesc.version` + - Categories: `.status.channels[0].currentCSVDesc.annotations["categories"]` + - Capabilities: `.status.channels[0].currentCSVDesc.annotations["capabilities"]` + +5. **Apply Search Filter** (if query provided): + - Case-insensitive search across: + - Operator name (`.metadata.name`) + - Display name (`.status.channels[0].currentCSVDesc.displayName`) + - Description (`.status.channels[0].currentCSVDesc.description`) + - Provider name (`.status.provider.name`) + - Categories + - If `--exact` flag, only match exact operator names + +6. **Sort Results**: + - Primary sort: By catalog source (redhat-operators first, then certified, community, etc.) + - Secondary sort: By operator name alphabetically + +7. **Format Search Results**: + + **A. Summary Header** + ``` + Found X operators matching "{query}" + ``` + + **B. Results List** + For each operator: + ``` + ┌───────────────────────────────────────────────────────────── + │ cert-manager-operator for Red Hat OpenShift + ├───────────────────────────────────────────────────────────── + │ Name: openshift-cert-manager-operator + │ Provider: Red Hat + │ Catalog: redhat-operators + │ Default: stable-v1 + │ Channels: stable-v1, tech-preview-v1.13 + │ Version: v1.13.1 + │ Categories: Security + │ + │ Description: Manages the lifecycle of TLS certificates... + │ + │ Install: /olm:install openshift-cert-manager-operator + └───────────────────────────────────────────────────────────── + ``` + +8. **Group by Catalog** (optional, for better readability): + ``` + ═════════════════════════════════════════════════════════════ + RED HAT OPERATORS (3) + ═════════════════════════════════════════════════════════════ + + [List of operators from redhat-operators] + + ═════════════════════════════════════════════════════════════ + CERTIFIED OPERATORS (1) + ═════════════════════════════════════════════════════════════ + + [List of operators from certified-operators] + + ═════════════════════════════════════════════════════════════ + COMMUNITY OPERATORS (2) + ═════════════════════════════════════════════════════════════ + + [List of operators from community-operators] + ``` + +9. **Provide Installation Guidance**: + - For each operator, show ready-to-use install command: + ``` + To install: /olm:install {operator-name} + ``` + - For operators with specific channel recommendations, note them + +10. **Handle No Results**: + - If no operators match the query: + ``` + No operators found matching "{query}" + + Suggestions: + - Try a broader search term + - List all available operators: /olm:search + - Check specific catalog: /olm:search {query} --catalog redhat-operators + ``` + +11. **Show Popular/Recommended Operators** (if no query provided): + - Highlight commonly used operators: + - cert-manager + - external-secrets-operator + - OpenShift Pipelines + - OpenShift GitOps + - Service Mesh + - etc. + +## Return Value +- **Success**: List of matching operators with detailed information +- **No Results**: Message indicating no matches with suggestions +- **Error**: Connection or permission error with troubleshooting guidance +- **Format**: + - Summary of search results + - Detailed operator information cards + - Installation commands for each operator + - Grouped by catalog source + +## Examples + +1. **Search for cert-manager operator**: + ``` + /olm:search cert-manager + ``` + +2. **Search for secrets-related operators**: + ``` + /olm:search secrets + ``` + Output listing multiple operators related to secrets management. + +3. **List all operators** (no query): + ``` + /olm:search + ``` + +4. **Search in specific catalog**: + ``` + /olm:search prometheus --catalog community-operators + ``` + Output showing only Prometheus-related operators from community-operators catalog. + +5. **Exact name match**: + ``` + /olm:search external-secrets-operator --exact + ``` + Output showing only the exact match for external-secrets-operator. + +6. **Search for operators by category** (e.g., security): + ``` + /olm:search security + ``` + Output listing all security-related operators. + +## Arguments +- **$1** (query): Search term to filter operators (optional) + - If not provided, lists all available operators (may be very long) + - Searches across name, display name, description, provider + - Case-insensitive partial matching + - Example: "cert", "secrets", "security", "monitoring" +- **$2+** (flags): Optional flags + - `--catalog `: Limit search to specific catalog + - Values: "redhat-operators", "certified-operators", "community-operators", "redhat-marketplace", or custom catalog name + - `--exact`: Only show exact name matches (no partial matching) + - `--installed`: Show only operators that are currently installed + + +## Troubleshooting + +- **No operators found**: + - Verify catalog sources are available: + ```bash + oc get catalogsources -n openshift-marketplace + ``` + - Check if catalog sources are healthy: + ```bash + oc get pods -n openshift-marketplace + ``` +- **Slow search**: + - Use more specific search terms + - Search in specific catalog: `--catalog redhat-operators` +- **Incomplete information**: + - Some operators may have limited metadata in their PackageManifest +- **Permission denied**: + - Ensure you can read PackageManifests: + ```bash + oc auth can-i list packagemanifests -n openshift-marketplace + ``` + +## Related Commands + +- `/olm:install ` - Install an operator found in search results +- `/olm:list` - List installed operators +- `/olm:status ` - Check status of an installed operator + +## Additional Resources + +- [OperatorHub.io](https://operatorhub.io/) - Browse operators online +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + diff --git a/plugins/olm/commands/status.md b/plugins/olm/commands/status.md new file mode 100644 index 0000000..603ceb0 --- /dev/null +++ b/plugins/olm/commands/status.md @@ -0,0 +1,351 @@ +--- +description: Get detailed status and health information for an operator +argument-hint: [namespace] +--- + +## Name +olm:status + +## Synopsis +``` +/olm:status [namespace] +``` + +## Description +The `olm:status` command provides comprehensive health and status information for a specific operator in an OpenShift cluster. It displays detailed information about the operator's CSV, Subscription, InstallPlan, deployments, and pods to help diagnose issues and verify proper operation. + +This command helps you: +- Check if an operator is running correctly +- Diagnose installation or upgrade problems +- View operator version and available updates +- Inspect operator deployments and pods +- Review recent events and conditions +- Identify resource issues or configuration problems + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Operator name (required) - Name of the operator to inspect + - `$2`: Namespace (optional) - Namespace where operator is installed + - If not provided, searches for the operator across all namespaces + - If multiple instances found, prompts user to specify namespace + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - If not installed or not authenticated, provide clear instructions + +3. **Locate Operator**: + - If namespace provided, verify operator exists in that namespace: + ```bash + oc get subscription {operator-name} -n {namespace} --ignore-not-found + ``` + - If no namespace provided, search across all namespaces: + ```bash + oc get subscription --all-namespaces -o json | jq -r '.items[] | select(.spec.name=="{operator-name}") | .metadata.namespace' + ``` + - If not found, display error with suggestions + - If multiple instances found, list them and ask user to specify namespace + +4. **Gather Subscription Information**: + - Get Subscription details: + ```bash + oc get subscription {operator-name} -n {namespace} -o json + ``` + - Extract: + - Channel: `.spec.channel` + - Install Plan Approval: `.spec.installPlanApproval` + - Source: `.spec.source` + - Source Namespace: `.spec.sourceNamespace` + - Installed CSV: `.status.installedCSV` + - Current CSV: `.status.currentCSV` + - State: `.status.state` + - Conditions: `.status.conditions[]` + +5. **Gather CSV Information**: + - Get CSV details: + ```bash + oc get csv {csv-name} -n {namespace} -o json + ``` + - Extract: + - Display Name: `.spec.displayName` + - Version: `.spec.version` + - Phase: `.status.phase` + - Message: `.status.message` + - Reason: `.status.reason` + - Creation Time: `.metadata.creationTimestamp` + - Conditions: `.status.conditions[]` + - Requirements: `.status.requirementStatus[]` + +6. **Gather InstallPlan Information**: + - Get related InstallPlans: + ```bash + oc get installplan -n {namespace} -o json + ``` + - Find InstallPlans related to this operator by checking `.spec.clusterServiceVersionNames` + - Extract: + - Name: `.metadata.name` + - Phase: `.status.phase` (e.g., "Complete", "Installing", "Failed") + - Approved: `.spec.approved` + - Bundle Resources: `.status.bundleLookups[]` + +7. **Gather Deployment Information**: + - Get deployments owned by the CSV: + ```bash + oc get deployments -n {namespace} -o json + ``` + - Filter deployments with owner reference to the CSV + - For each deployment, extract: + - Name: `.metadata.name` + - Ready Replicas: `.status.readyReplicas` / `.status.replicas` + - Available: `.status.availableReplicas` + - Conditions: `.status.conditions[]` + +8. **Gather Pod Information**: + - Get pods managed by operator deployments: + ```bash + oc get pods -n {namespace} -l app={operator-label} -o json + ``` + - For each pod, extract: + - Name: `.metadata.name` + - Status: `.status.phase` + - Ready: Count of ready containers vs total + - Restarts: Sum of `.status.containerStatuses[].restartCount` + - Age: Calculate from `.metadata.creationTimestamp` + +9. **Check for Recent Events**: + - Get events related to the operator: + ```bash + oc get events -n {namespace} --field-selector involvedObject.name={csv-name} --sort-by='.lastTimestamp' + ``` + - Show last 5-10 events, especially warnings and errors + +10. **Check for Available Updates**: + - Get PackageManifest to check for newer versions: + ```bash + oc get packagemanifest {operator-name} -n openshift-marketplace -o json + ``` + - Extract current channel information: + - Current channel from Subscription: `.spec.channel` + - Latest version in current channel + - Available channels + - Compare installed CSV version with latest available version + - Check for pending InstallPlans: + ```bash + oc get installplan -n {namespace} -o json | jq '.items[] | select(.spec.approved==false)' + ``` + - Determine if manual approval is required: + ```bash + oc get subscription {operator-name} -n {namespace} -o jsonpath='{.spec.installPlanApproval}' + ``` + - Reference: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-approving-operator-upgrades_olm-updating-operators + +11. **Format Comprehensive Report**: + Create a structured report with sections: + + **A. Overview** + ``` + Operator: {display-name} + Name: {operator-name} + Namespace: {namespace} + Version: {version} + Status: {phase} + ``` + + **B. Subscription** + ``` + Channel: {channel} + Source: {source} + Install Plan Approval: {approval-mode} (Automatic|Manual) + State: {state} + Installed CSV: {installed-csv-name} + Current CSV: {current-csv-name} + ``` + + **C. ClusterServiceVersion (CSV)** + ``` + Name: {csv-name} + Phase: {phase} + Message: {message} + Requirements: [list requirements status] + ``` + + **D. InstallPlan** + ``` + Name: {installplan-name} + Phase: {phase} (Complete|Installing|RequiresApproval|Failed) + Approved: {true/false} + + [If Phase=RequiresApproval and Approved=false:] + ⚠️ Manual approval required for installation/upgrade + To approve: /olm:approve {operator-name} {namespace} + Or manually: oc patch installplan {installplan-name} -n {namespace} \ + --type merge --patch '{"spec":{"approved":true}}' + ``` + + **E. Deployments** + ``` + NAME READY AVAILABLE AGE + cert-manager 1/1 1 5d + cert-manager-webhook 1/1 1 5d + ``` + + **F. Pods** + ``` + NAME STATUS READY RESTARTS AGE + cert-manager-7d4f8f8b4-abcde Running 1/1 0 5d + cert-manager-webhook-6b7c9d5f-fghij Running 1/1 0 5d + ``` + + **G. Recent Events** (if any warnings/errors) + ``` + 5m Warning InstallPlanFailed Failed to install... + 2m Normal InstallSucceeded Successfully installed + ``` + + **H. Update Information** + ``` + Current Version: {current-version} + Latest Available: {latest-version} (in channel: {channel}) + Update Status: [Up to date | Update available | Unknown] + + Available Channels: + - stable-v1 (latest: v1.13.1) + - tech-preview-v1.14 (latest: v1.14.0) + + [If update available in current channel:] + 📦 Update available: {current-version} → {latest-version} + To update: /olm:upgrade {operator-name} {namespace} + + [If newer version in different channel:] + 💡 Newer version available in channel '{new-channel}': {newer-version} + To switch channels: /olm:upgrade {operator-name} {namespace} --channel={new-channel} + ``` + + **I. Health Summary** + ``` + ✅ Operator is healthy and running + ⚠️ Operator has warnings (see events) + ❌ Operator is not healthy (see details) + 🔄 Operator is upgrading (Current: {old-version} → Target: {new-version}) + ⏸️ Operator upgrade pending manual approval + ``` + +12. **Provide Actionable Recommendations**: + - If operator is failed: + ``` + ❌ Operator failed: {reason} + + Troubleshooting steps: + 1. Check operator logs: oc logs -n {namespace} deployment/{operator-deployment} + 2. Check events: oc get events -n {namespace} --sort-by='.lastTimestamp' + 3. Check CSV conditions: oc describe csv {csv-name} -n {namespace} + 4. Run diagnostics: /olm:diagnose {operator-name} {namespace} + ``` + - If upgrade available: + ``` + 📦 Update available: {current} → {latest} + To upgrade: /olm:upgrade {operator-name} {namespace} + ``` + - If pods are crashing: + ``` + ⚠️ Pods are crashing (restarts: {count}) + Check logs: oc logs -n {namespace} {pod-name} + Previous logs: oc logs -n {namespace} {pod-name} --previous + ``` + - If InstallPlan requires approval: + ``` + ⏸️ InstallPlan requires manual approval + + InstallPlan: {installplan-name} + Version: {target-version} + + To approve: /olm:approve {operator-name} {namespace} + Or manually: oc patch installplan {installplan-name} -n {namespace} \ + --type merge --patch '{"spec":{"approved":true}}' + + To switch to automatic approvals: + oc patch subscription {operator-name} -n {namespace} \ + --type merge --patch '{"spec":{"installPlanApproval":"Automatic"}}' + ``` + - If operator is upgrading: + ``` + 🔄 Operator upgrade in progress: {old-version} → {new-version} + Monitor progress: watch oc get csv,installplan -n {namespace} + ``` + +## Return Value +- **Success**: Comprehensive status report with all operator details +- **Not Found**: Error message with suggestions to list operators or check spelling +- **Multiple Instances**: List of namespaces where operator is installed +- **Error**: Connection or permission error with troubleshooting guidance +- **Format**: Multi-section report with: + - Overview + - Subscription details + - CSV status + - InstallPlan status + - Deployment status + - Pod status + - Recent events + - Health summary + - Recommendations + +## Examples + +1. **Check status of cert-manager operator**: + ``` + /olm:status openshift-cert-manager-operator + ``` + +2. **Check status with specific namespace**: + ``` + /olm:status external-secrets-operator external-secrets-operator + ``` + +## Arguments +- **$1** (operator-name): Name of the operator to inspect (required) + - Example: "openshift-cert-manager-operator" + - Must match the operator's Subscription name +- **$2** (namespace): Namespace where operator is installed (optional) + - If not provided, searches all namespaces + - Example: "cert-manager-operator" + +## Notes + +- **Comprehensive View**: This command aggregates data from multiple resources (Subscription, CSV, InstallPlan, Deployments, Pods) for a complete picture +- **Permissions**: Requires read permissions for subscriptions, csvs, installplans, deployments, pods, and events in the target namespace +- **Performance**: May take a few seconds to gather all information for large operators with many resources +- **Auto-Discovery**: If namespace is not specified, the command automatically finds the operator across all namespaces +- **Health Checks**: The command evaluates multiple factors to determine overall operator health +- **Troubleshooting**: Provides context-aware recommendations based on detected issues + +## Troubleshooting + +- **Operator not found**: + - Verify operator name: `oc get subscriptions --all-namespaces | grep {operator-name}` + - List all operators: `/olm:list` +- **Multiple instances found**: + - Specify namespace explicitly: `/olm:status {operator-name} {namespace}` +- **Permission denied**: + - Ensure you have read permissions in the target namespace + - Check: `oc auth can-i get csv -n {namespace}` +- **Incomplete information**: + - Some operators may not have all resources (e.g., manually installed CSVs without Subscriptions) + +## Related Commands + +- `/olm:list` - List all installed operators +- `/olm:install ` - Install a new operator +- `/olm:uninstall ` - Uninstall an operator +- `/olm:upgrade ` - Upgrade an operator +- `/olm:approve ` - Approve pending InstallPlans +- `/olm:diagnose ` - Diagnose and fix operator issues + +## Additional Resources + +- [Viewing Operator Status](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-status-viewing-operator-status) +- [Updating Installed Operators](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-updating-operators) +- [Troubleshooting Operator Issues](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-troubleshooting-operator-issues) + diff --git a/plugins/olm/commands/uninstall.md b/plugins/olm/commands/uninstall.md new file mode 100644 index 0000000..36c3ec1 --- /dev/null +++ b/plugins/olm/commands/uninstall.md @@ -0,0 +1,392 @@ +--- +description: Uninstall a day-2 operator and optionally remove its resources +argument-hint: [namespace] [--remove-crds] [--remove-namespace] +--- + +## Name +olm:uninstall + +## Synopsis +``` +/olm:uninstall [namespace] [--remove-crds] [--remove-namespace] +``` + +## Description +The `olm:uninstall` command uninstalls a day-2 operator from an OpenShift cluster by removing its Subscription, ClusterServiceVersion (CSV), and optionally its Custom Resource Definitions (CRDs) and namespace. + +This command provides a comprehensive uninstallation workflow: +- Removes the operator's Subscription +- Deletes the ClusterServiceVersion (CSV) +- Optionally removes operator-managed deployments +- Optionally deletes Custom Resource Definitions (CRDs) +- Optionally removes the operator's namespace +- Provides detailed feedback on each step + +The command is designed to safely clean up operators installed via OLM, with optional flags for thorough cleanup of all operator-related resources. + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Operator name (required) - The name of the operator to uninstall + - `$2`: Namespace (optional) - The namespace where operator is installed. If not provided, defaults to `{operator-name}-operator` + - `$3+`: Flags (optional): + - `--remove-crds`: Remove Custom Resource Definitions after uninstalling + - `--remove-namespace`: Remove the operator's namespace after cleanup + - `--force`: Skip confirmation prompts + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - Check if user has cluster-admin or sufficient privileges + +3. **Verify Operator Installation**: + - Check if namespace exists: + ```bash + oc get namespace {namespace} --ignore-not-found + ``` + - Check if subscription exists: + ```bash + oc get subscription {operator-name} -n {namespace} --ignore-not-found + ``` + - If not found, display error: "Operator {operator-name} is not installed in namespace {namespace}" + - List what will be uninstalled + +4. **Display Uninstallation Plan**: + - Show operator details: + ```bash + oc get subscription {operator-name} -n {namespace} -o yaml + oc get csv -n {namespace} + ``` + - Display what will be removed: + - Subscription name and namespace + - CSV name and version + - Deployments (if any) + - CRDs (if `--remove-crds` flag is set) + - Namespace (if `--remove-namespace` flag is set) + +5. **Request User Confirmation** (unless `--force` flag is set): + - Display warning: + ``` + WARNING: You are about to uninstall {operator-name} from namespace {namespace}. + This will remove: + - Subscription: {subscription-name} + - ClusterServiceVersion: {csv-name} + - Operator deployments + [- Custom Resource Definitions (if --remove-crds is set)] + [- Namespace {namespace} (if --remove-namespace is set)] + + Are you sure you want to continue? (yes/no) + ``` + - Wait for user confirmation + - If user says no, abort operation + +6. **Delete Subscription**: + - Remove the operator's subscription: + ```bash + oc delete subscription {operator-name} -n {namespace} + ``` + - Verify deletion: + ```bash + oc get subscription {operator-name} -n {namespace} --ignore-not-found + ``` + - Display result + +7. **Delete ClusterServiceVersion (CSV)**: + - Get the CSV name: + ```bash + oc get csv -n {namespace} -o jsonpath='{.items[?(@.spec.displayName contains "{operator-name}")].metadata.name}' + ``` + - Delete the CSV: + ```bash + oc delete csv {csv-name} -n {namespace} + ``` + - This will automatically remove operator deployments + - Verify CSV is deleted: + ```bash + oc get csv -n {namespace} --ignore-not-found + ``` + +8. **Remove Operator Deployments** (if still present): + - List deployments created by the operator: + ```bash + oc get deployments -n {namespace} + ``` + - For operators like cert-manager with labeled resources: + ```bash + oc delete deployment -n {namespace} -l app.kubernetes.io/instance={operator-base-name} + ``` + - Verify deployments are deleted: + ```bash + oc get deployments -n {namespace} + ``` + +8.5. **Check for Orphaned Custom Resources** (before removing CRDs): + - Get list of CRDs managed by the operator from CSV: + ```bash + oc get csv -n {namespace} -o jsonpath='{.items[0].spec.customresourcedefinitions.owned[*].name}' + ``` + - For each CRD, search for CR instances across all namespaces: + ```bash + oc get --all-namespaces --ignore-not-found + ``` + - If CRs exist, list them with details: + ``` + WARNING: Found custom resources that may prevent clean uninstallation: + - namespace-1/ (kind: ) + - namespace-2/ (kind: ) + + These resources should be deleted before uninstalling the operator. + Do you want to delete these custom resources? (yes/no) + ``` + - If user confirms, delete each CR: + ```bash + oc delete -n + ``` + - This prevents namespace from getting stuck in Terminating state + - Reference: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-reinstalling-operators-after-failed-uninstallation_olm-troubleshooting-operator-issues + +9. **Remove Custom Resource Definitions** (if `--remove-crds` flag is set): + - **WARNING**: Display critical warning to user: + ``` + WARNING: Removing CRDs will delete ALL custom resources of these types across the entire cluster! + This action is irreversible and will affect all namespaces. + + Are you absolutely sure you want to remove CRDs? (yes/no) + ``` + - If user confirms, proceed with CRD removal + - Get list of CRDs owned by the operator: + ```bash + oc get csv {csv-name} -n {namespace} -o jsonpath='{.spec.customresourcedefinitions.owned[*].name}' + ``` + - For each CRD, check if custom resources exist: + ```bash + oc get {crd-name} --all-namespaces --ignore-not-found + ``` + - Display warning if custom resources exist + - Delete CRDs: + ```bash + oc delete crd {crd-name} + ``` + +10. **Remove Namespace** (if `--remove-namespace` flag is set): + - **WARNING**: Display warning: + ``` + WARNING: Removing namespace {namespace} will delete all resources in this namespace! + + Are you sure you want to remove namespace {namespace}? (yes/no) + ``` + - If user confirms: + ```bash + oc delete namespace {namespace} + ``` + - Monitor namespace deletion with timeout: + ```bash + oc wait --for=delete namespace/{namespace} --timeout=120s + ``` + - If namespace gets stuck in "Terminating" state after 120 seconds: + - Check for resources preventing deletion: + ```bash + oc api-resources --verbs=list --namespaced -o name | \ + xargs -n 1 oc get --show-kind --ignore-not-found -n {namespace} + ``` + - Check for finalizers on the namespace: + ```bash + oc get namespace {namespace} -o jsonpath='{.metadata.finalizers}' + ``` + - Display helpful error message: + ``` + ERROR: Namespace {namespace} is stuck in Terminating state. + + Possible causes: + - Resources with finalizers preventing deletion + - API services that are unavailable + - Custom resources that cannot be deleted + + To diagnose and fix, run: /olm:diagnose {operator-name} {namespace} + + Manual troubleshooting: + 1. Check remaining resources: + oc api-resources --verbs=list --namespaced -o name | \ + xargs -n 1 oc get --show-kind --ignore-not-found -n {namespace} + + 2. Check namespace finalizers: + oc get namespace {namespace} -o yaml | grep -A5 finalizers + + WARNING: Do NOT force-delete the namespace as it can lead to unstable cluster behavior. + See: https://access.redhat.com/solutions/4165791 + ``` + - Exit with error code + - Note: OperatorGroup will be automatically deleted with the namespace + +11. **Post-Uninstall Verification**: + - Verify all resources are cleaned up: + ```bash + oc get subscription,csv,installplan -n {namespace} --ignore-not-found + ``` + - Check if any CRDs remain (if they were supposed to be deleted): + ```bash + oc get crd | grep + ``` + - If uninstalling without `--remove-namespace`, check namespace is clean: + ```bash + oc get all -n {namespace} + ``` + - Display any remaining resources with suggestions for cleanup + +12. **Display Uninstallation Summary**: + - Show what was successfully removed: + ``` + ✓ Uninstallation Summary: + ✓ Subscription '{operator-name}' deleted + ✓ CSV '{csv-name}' deleted + ✓ Operator deployments removed + [✓ X custom resources deleted] + [✓ Y CRDs removed] + [✓ Namespace '{namespace}' deleted] + ``` + - If CRDs or namespace were NOT removed, provide instructions: + ``` + Note: The following resources were NOT removed: + - Custom Resource Definitions (use --remove-crds to remove) + - Namespace {namespace} (use --remove-namespace to remove) + + To completely remove all operator resources, run: + /olm:uninstall {operator-name} {namespace} --remove-crds --remove-namespace + ``` + - **Important warning about reinstallation**: + ``` + IMPORTANT: Before reinstalling this operator, verify all resources are cleaned: + + oc get subscription,csv,installplan -n {namespace} + oc get crd | grep + + Failure to completely uninstall may cause reinstallation issues. + See: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-reinstalling-operators-after-failed-uninstallation_olm-troubleshooting-operator-issues + ``` + +## Return Value +- **Success**: Operator uninstalled successfully with summary of removed resources +- **Partial Success**: Some resources removed with warnings about remaining resources +- **Error**: Uninstallation failed with specific error message +- **Format**: Structured output showing: + - Subscription deletion status + - CSV deletion status + - Deployment removal status + - CRD removal status (if applicable) + - Namespace deletion status (if applicable) + +## Examples + +1. **Uninstall cert-manager-operator (basic)**: + ``` + /olm:uninstall openshift-cert-manager-operator + ``` + +2. **Uninstall with custom namespace**: + ``` + /olm:uninstall openshift-cert-manager-operator my-cert-manager + ``` + +3. **Complete cleanup including namespace**: + ``` + /olm:uninstall openshift-cert-manager-operator cert-manager-operator --remove-crds --remove-namespace + ``` + This performs a complete cleanup of all operator-related resources. + +4. **Force uninstall without prompts**: + ``` + /olm:uninstall openshift-cert-manager-operator cert-manager-operator --force + ``` + Skips all confirmation prompts (use with caution!). + +## Arguments +- **$1** (operator-name): The name of the operator to uninstall (required) + - Example: "openshift-cert-manager-operator" + - Must match the Subscription name +- **$2** (namespace): The namespace where operator is installed (optional) + - Default: `{operator-name}` (operator name without "openshift-" prefix) + - Example: "cert-manager-operator" +- **$3+** (flags): Optional flags (can combine multiple): + - `--remove-crds`: Remove Custom Resource Definitions (WARNING: affects entire cluster) + - `--remove-namespace`: Remove the operator's namespace and all its resources + - `--force`: Skip all confirmation prompts (use with caution) + +## Safety Features + +1. **Multiple Confirmations**: Separate confirmations for CRD and namespace removal +2. **Detailed Warnings**: Clear warnings about the scope of deletions +3. **Verification Steps**: Checks that resources exist before attempting deletion +4. **Summary Report**: Detailed summary of what was and wasn't removed +5. **Graceful Failures**: Continues with remaining steps if individual deletions fail + +## Troubleshooting + +- **Subscription not found**: Verify the operator name and namespace: + ```bash + oc get subscriptions --all-namespaces | grep {operator-name} + ``` +- **CSV won't delete**: Check for finalizers: + ```bash + oc get csv {csv-name} -n {namespace} -o yaml | grep finalizers + ``` + If finalizers are present, they may be waiting for resources to be cleaned up. Check operator logs and events. + +- **Namespace stuck in Terminating**: This is a common issue after operator uninstallation. + ```bash + # Find remaining resources + oc api-resources --verbs=list --namespaced -o name | \ + xargs -n 1 oc get --show-kind --ignore-not-found -n {namespace} + + # Check namespace finalizers + oc get namespace {namespace} -o yaml | grep -A5 finalizers + ``` + **IMPORTANT**: Do not force-delete the namespace. This can cause cluster instability. + Instead, use `/olm:diagnose {operator-name} {namespace}` to diagnose and fix the issue. + +- **CRDs won't delete**: Check for remaining custom resources: + ```bash + oc get {crd-name} --all-namespaces + ``` + CRDs cannot be deleted while CR instances exist. Delete all CRs first. + +- **Custom resources won't delete**: Some CRs may have finalizers preventing deletion: + ```bash + oc get -n -o yaml | grep finalizers + ``` + The operator controller (if still running) should remove finalizers. If operator is already deleted, you may need to manually patch the CR to remove finalizers (use with extreme caution). + +- **Permission denied**: Ensure you have cluster-admin privileges for CRD deletion: + ```bash + oc auth can-i delete crd + ``` + +- **Reinstallation fails after uninstall**: This usually means cleanup was incomplete. + Run these checks before reinstalling: + ```bash + # Check for remaining subscriptions/CSVs + oc get subscription,csv -n {namespace} + + # Check for remaining CRDs + oc get crd | grep + + # Check if namespace is clean or stuck + oc get namespace {namespace} + ``` + See: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-reinstalling-operators-after-failed-uninstallation_olm-troubleshooting-operator-issues + +## Related Commands + +- `/olm:install` - Install a day-2 operator +- `/olm:list` - List installed operators +- `/olm:status` - Check operator status before uninstalling +- `/olm:diagnose` - Diagnose and fix uninstallation issues +- `/olm:upgrade` - Upgrade an operator + +## Additional Resources + +- [Red Hat OpenShift: Deleting Operators from a cluster](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-deleting-operators-from-a-cluster) +- [Red Hat OpenShift: Reinstalling Operators after failed uninstallation](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-reinstalling-operators-after-failed-uninstallation_olm-troubleshooting-operator-issues) +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + diff --git a/plugins/olm/commands/upgrade.md b/plugins/olm/commands/upgrade.md new file mode 100644 index 0000000..75434f6 --- /dev/null +++ b/plugins/olm/commands/upgrade.md @@ -0,0 +1,349 @@ +--- +description: Update an operator to the latest version or switch channels +argument-hint: [namespace] [--channel=] [--approve] +--- + +## Name +olm:upgrade + +## Synopsis +``` +/olm:upgrade [namespace] [--channel=] [--approve] +``` + +## Description +The `olm:upgrade` command updates an installed operator to the latest version in its current channel or switches to a different channel. It can also approve pending InstallPlans for operators with manual approval mode. + +This command helps you: +- Update operators to the latest version in their channel +- Switch operators to different channels (e.g., stable to tech-preview) +- Approve pending upgrade InstallPlans for manual approval mode +- Monitor upgrade progress +- Rollback on failure (if possible via OLM) + +## Implementation + +The command performs the following steps: + +1. **Parse Arguments**: + - `$1`: Operator name (required) - Name of the operator to upgrade + - `$2`: Namespace (optional) - Namespace where operator is installed + - If not provided, searches for the operator across all namespaces + - `$3+`: Flags (optional): + - `--channel=`: Switch to a different channel + - `--approve`: Automatically approve pending InstallPlan (for manual approval mode) + +2. **Prerequisites Check**: + - Verify `oc` CLI is installed: `which oc` + - Verify cluster access: `oc whoami` + - Check if user has sufficient privileges + +3. **Locate Operator**: + - If namespace provided, verify operator exists: + ```bash + oc get subscription {operator-name} -n {namespace} --ignore-not-found + ``` + - If no namespace provided, search across all namespaces: + ```bash + oc get subscription --all-namespaces -o json | jq -r '.items[] | select(.spec.name=="{operator-name}") | .metadata.namespace' + ``` + - If not found, display error with suggestions + - If multiple instances found, prompt user to specify namespace + +4. **Get Current State**: + - Get current Subscription: + ```bash + oc get subscription {operator-name} -n {namespace} -o json + ``` + - Extract: + - Current channel: `.spec.channel` + - Install plan approval: `.spec.installPlanApproval` + - Installed CSV: `.status.installedCSV` + - Current CSV: `.status.currentCSV` + - Get current CSV version: + ```bash + oc get csv {installed-csv} -n {namespace} -o jsonpath='{.spec.version}' + ``` + +5. **Check for Available Updates**: + - Get PackageManifest: + ```bash + oc get packagemanifest {operator-name} -n openshift-marketplace -o json + ``` + - Extract available channels and their latest versions + - If `--channel` flag is specified, verify channel exists + - If no channel flag, check for updates in current channel + - Compare current version with latest available version + - Reference: https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-updating-operators + +6. **Display Upgrade Plan**: + ``` + Operator Upgrade Plan: + + Operator: {display-name} + Namespace: {namespace} + Current Version: {current-version} + Current Channel: {current-channel} + + [If switching channels:] + Target Channel: {new-channel} + Target Version: {new-version} + + [If upgrading in same channel:] + Latest Version: {latest-version} (in channel: {current-channel}) + + Approval Mode: {Automatic|Manual} + ``` + +7. **Check for Pending InstallPlans** (for manual approval mode): + - Get pending InstallPlans: + ```bash + oc get installplan -n {namespace} -o json | jq '.items[] | select(.spec.approved==false)' + ``` + - If pending InstallPlan exists and `--approve` flag is set: + - Display InstallPlan details + - Approve the InstallPlan (skip to step 9) + - If pending InstallPlan exists and no `--approve` flag: + ``` + ⏸️ Pending InstallPlan found (requires manual approval) + + InstallPlan: {installplan-name} + Target Version: {target-version} + + To approve: /olm:upgrade {operator-name} {namespace} --approve + Or use: /olm:approve {operator-name} {namespace} + ``` + - Exit, waiting for user to approve + +8. **Perform Channel Switch** (if `--channel` flag provided): + - Confirm with user (unless `--force` flag): + ``` + WARNING: Switching channels may upgrade or downgrade the operator. + + Current: {current-channel} ({current-version}) + Target: {new-channel} ({target-version}) + + Continue? (yes/no) + ``` + - Update Subscription to new channel: + ```bash + oc patch subscription {operator-name} -n {namespace} \ + --type merge --patch '{"spec":{"channel":"{new-channel}"}}' + ``` + - Display confirmation: + ``` + ✓ Subscription updated to channel: {new-channel} + ``` + +9. **Approve Pending InstallPlan** (if `--approve` flag or automatic approval): + - If approval mode is Manual and `--approve` flag is set: + ```bash + oc patch installplan {installplan-name} -n {namespace} \ + --type merge --patch '{"spec":{"approved":true}}' + ``` + - Display approval confirmation: + ``` + ✓ InstallPlan approved: {installplan-name} + ``` + +10. **Monitor Upgrade Progress**: + - Wait for new InstallPlan to be created (if switching channels): + ```bash + oc get installplan -n {namespace} -w --timeout=60s + ``` + - Wait for new CSV to reach "Succeeded" phase: + ```bash + oc get csv -n {namespace} -w --timeout=300s + ``` + - Display progress updates: + ``` + 🔄 Upgrade in progress... + ⏳ Waiting for InstallPlan to complete... + ⏳ New CSV installing: {new-csv-name} + ⏳ Old CSV replacing: {old-csv-name} + ``` + - Poll every 10 seconds to check status + - Timeout: 10 minutes for upgrade to complete + +11. **Verify Upgrade Success**: + - Check new CSV status: + ```bash + oc get csv -n {namespace} -o json + ``` + - Verify new CSV phase is "Succeeded" + - Get new version: + ```bash + oc get csv {new-csv-name} -n {namespace} -o jsonpath='{.spec.version}' + ``` + - Check deployments are healthy: + ```bash + oc get deployments -n {namespace} + ``` + - Check pods are running: + ```bash + oc get pods -n {namespace} + ``` + +12. **Display Upgrade Summary**: + ``` + ✓ Operator Upgrade Complete! + + Operator: {display-name} + Namespace: {namespace} + Previous Version: {old-version} + Current Version: {new-version} + Channel: {channel} + + Deployment Status: + - {deployment-1}: 1/1 replicas ready + - {deployment-2}: 1/1 replicas ready + + To check status: /olm:status {operator-name} {namespace} + ``` + +13. **Handle Upgrade Failures**: + - If upgrade fails or times out: + ``` + ❌ Operator upgrade failed + + Current State: + - CSV: {csv-name} (Phase: {phase}) + - Message: {error-message} + + Troubleshooting steps: + 1. Check CSV status: oc describe csv {csv-name} -n {namespace} + 2. Check events: oc get events -n {namespace} --sort-by='.lastTimestamp' + 3. Check InstallPlan: oc get installplan -n {namespace} + 4. Run diagnostics: /olm:diagnose {operator-name} {namespace} + + To rollback (if OLM supports): + oc patch subscription {operator-name} -n {namespace} \ + --type merge --patch '{"spec":{"channel":"{old-channel}"}}' + ``` + +## Return Value +- **Success**: Operator upgraded successfully with new version details +- **Pending Approval**: Upgrade waiting for manual approval with instructions +- **No Update Available**: Operator is already at the latest version +- **Error**: Upgrade failed with specific error message and troubleshooting guidance +- **Format**: Structured output showing: + - Previous and current versions + - Channel information + - Deployment and pod status + - Next steps or related commands + +## Examples + +1. **Check for and install updates in current channel**: + ``` + /olm:upgrade openshift-cert-manager-operator + ``` + +2. **Upgrade with specific namespace**: + ``` + /olm:upgrade external-secrets-operator eso-operator + ``` + +3. **Switch to a different channel**: + ``` + /olm:upgrade openshift-cert-manager-operator cert-manager-operator --channel=tech-preview-v1.14 + ``` + This switches from stable-v1 to tech-preview-v1.14 channel. + +4. **Approve pending upgrade (manual approval mode)**: + ``` + /olm:upgrade openshift-cert-manager-operator --approve + ``` + +5. **Switch channel and approve in one command**: + ``` + /olm:upgrade prometheus prometheus-operator --channel=beta --approve + ``` + +## Arguments +- **$1** (operator-name): Name of the operator to upgrade (required) + - Example: "openshift-cert-manager-operator" + - Must match the operator's Subscription name +- **$2** (namespace): Namespace where operator is installed (optional) + - If not provided, searches all namespaces + - Example: "cert-manager-operator" +- **$3+** (flags): Optional flags + - `--channel=`: Switch to specified channel + - Example: `--channel=stable-v1`, `--channel=tech-preview` + - Triggers upgrade/downgrade to the version in that channel + - `--approve`: Automatically approve pending InstallPlan + - Only needed for operators with Manual approval mode + - Equivalent to `/olm:approve` command + +## Notes + +- **Automatic Updates**: Operators with `installPlanApproval: Automatic` will upgrade automatically when new versions are available in their channel +- **Manual Approval**: Operators with `installPlanApproval: Manual` require explicit approval via `--approve` flag or `/olm:approve` command +- **Channel Switching**: Changing channels may result in upgrade or downgrade depending on the versions in each channel +- **Rollback**: OLM has limited rollback support. Switching back to the previous channel may work, but data migration issues may occur +- **Upgrade Timing**: Upgrades happen according to the operator's upgrade strategy (some may cause downtime) + +## Troubleshooting + +- **No updates available**: + ```bash + # Check current version + oc get csv -n {namespace} + + # Check available versions + oc get packagemanifest {operator-name} -n openshift-marketplace -o json + ``` + +- **Upgrade stuck or pending**: + ```bash + # Check InstallPlan status + oc get installplan -n {namespace} + + # Check for events + oc get events -n {namespace} --sort-by='.lastTimestamp' | tail -20 + ``` + +- **Manual approval required**: + ```bash + # List pending InstallPlans + oc get installplan -n {namespace} -o json | jq '.items[] | select(.spec.approved==false)' + + # Approve specific InstallPlan + /olm:approve {operator-name} {namespace} + ``` + +- **Upgrade failed**: + ```bash + # Check CSV status + oc describe csv -n {namespace} + + # Check operator logs + oc logs -n {namespace} deployment/{operator-deployment} + + # Run diagnostics + /olm:diagnose {operator-name} {namespace} + ``` + +- **Rollback needed**: + - OLM doesn't have built-in rollback + - Can try switching back to previous channel, but may have issues: + ```bash + oc patch subscription {operator-name} -n {namespace} \ + --type merge --patch '{"spec":{"channel":"{old-channel}"}}' + ``` + - Consider backup/restore of custom resources before upgrading + +## Related Commands + +- `/olm:status ` - Check current version and available updates +- `/olm:approve ` - Approve pending InstallPlans +- `/olm:install ` - Install an operator +- `/olm:diagnose ` - Diagnose upgrade issues + +## Additional Resources + +- [Red Hat OpenShift: Updating Installed Operators](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-updating-operators) +- [Red Hat OpenShift: Approving Operator Upgrades](https://docs.redhat.com/en/documentation/openshift_container_platform/4.20/html/operators/administrator-tasks#olm-approving-operator-upgrades_olm-updating-operators) +- [Operator Lifecycle Manager Documentation](https://olm.operatorframework.io/) + +