Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions app-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,20 @@ openchoreo:
baseUrl: ${OPENCHOREO_API_URL}
token: ${OPENCHOREO_TOKEN} # optional for now: for authentication
defaultOwner: 'platformengineer' # Default owner for catalog entities

# DEFAULT: Standard scheduled ingestion (recommended for most deployments)
schedule:
frequency: 30 # seconds between runs (default: 30)
timeout: 120 # seconds for timeout (default: 120)

# OPTIONAL: For large-scale deployments, use incremental ingestion instead
# Uncomment the section below and comment out the schedule section above
# Also update packages/backend/src/index.ts to use the incremental module
# incremental:
# burstLength: 16 # Duration of each burst of processing activity in seconds
# burstInterval: 8 # Interval between bursts of processing activity in seconds
# chunkSize: 512 # Number of items to fetch per API request
# restLength: 60 # Duration of rest periods between bursts in minutes

thunder:
# Environment variables are injected by Helm chart (see https://github.com/openchoreo/openchoreo install/helm/openchoreo/templates/backstage/deployment.yaml)
Expand Down
1 change: 1 addition & 0 deletions packages/backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
"@openchoreo/backstage-plugin-catalog-backend-module-openchoreo-users": "workspace:^",
"@openchoreo/backstage-plugin-platform-engineer-core-backend": "workspace:^",
"@openchoreo/backstage-plugin-scaffolder-backend-module": "workspace:^",
"@openchoreo/plugin-catalog-backend-module-openchoreo-incremental": "workspace:^",
"app": "link:../app",
"better-sqlite3": "^9.0.0",
"express-session": "^1.18.2",
Expand Down
16 changes: 15 additions & 1 deletion packages/backend/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ import { createBackend } from '@backstage/backend-defaults';
import { OpenChoreoDefaultAuthModule } from '@openchoreo/backstage-plugin-auth-backend-module-openchoreo-default';
import { rootHttpRouterServiceFactory } from '@backstage/backend-defaults/rootHttpRouter';

// OPTIONAL: For large-scale deployments, use the incremental ingestion module
// Uncomment the following lines and comment out the standard catalog-backend-module below
// import { catalogModuleOpenchoreoIncrementalProvider } from '@openchoreo/plugin-catalog-backend-module-openchoreo-incremental';

const backend = createBackend();

backend.add(rootHttpRouterServiceFactory());
Expand Down Expand Up @@ -58,7 +62,18 @@ backend.add(import('@backstage/plugin-search-backend-module-techdocs'));
backend.add(import('@backstage/plugin-user-settings-backend'));

backend.add(import('@openchoreo/backstage-plugin-backend'));

// DEFAULT: Standard catalog backend module (recommended for most deployments)
backend.add(import('@openchoreo/backstage-plugin-catalog-backend-module'));

// OPTIONAL: For large-scale deployments, use incremental ingestion instead
// Comment out the standard module above and uncomment the lines below:
// backend.add(
// import('@openchoreo/plugin-catalog-backend-module-openchoreo-incremental'),
// );
// backend.add(catalogModuleOpenchoreoIncrementalProvider);
// Note: Also update app-config.yaml to use openchoreo.incremental instead of openchoreo.schedule

backend.add(import('@openchoreo/backstage-plugin-scaffolder-backend-module'));
backend.add(
import(
Expand All @@ -68,5 +83,4 @@ backend.add(
backend.add(
import('@openchoreo/backstage-plugin-platform-engineer-core-backend'),
);
// backend.add(import('@openchoreo/backstage-plugin-home-backend'));
backend.start();
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
module.exports = require('@backstage/cli/config/eslint-factory')(__dirname);
175 changes: 175 additions & 0 deletions plugins/catalog-backend-module-openchoreo-incremental/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# OpenChoreo Incremental Provider

The OpenChoreo Incremental Provider processes entities in small batches using cursor-based pagination with burst and rest cycles, providing optimal memory consumption, scalability, and controlled load for large OpenChoreo installations.

## Installation

Add the incremental provider module to your backend:

```typescript
// packages/backend/src/index.ts
backend.add(
import('@openchoreo/plugin-catalog-backend-module-openchoreo-incremental'),
);
```

## Configuration

```yaml
openchoreo:
baseUrl: ${OPENCHOREO_API_URL}
token: ${OPENCHOREO_TOKEN}
incremental:
burstLength: 10 # seconds - duration of each processing burst
burstInterval: 30 # seconds - interval between bursts during active ingestion
restLength: 30 # minutes - rest period after completing full ingestion
chunkSize: 50 # entities per API request
```

## How It Works

### Burst-Based Processing

The provider uses a burst-and-rest cycle to control load:

1. **Burst Phase**: Processes entities continuously for `burstLength` seconds
2. **Interstitial Phase**: Pauses for `burstInterval` seconds between bursts
3. **Rest Phase**: After completing a full ingestion cycle, rests for `restLength` minutes before starting again

This approach prevents overwhelming the API server while ensuring regular catalog updates.

### Cursor-Based Pagination

The provider traverses OpenChoreo resources in three phases using cursor-based pagination:

1. **Organizations Phase**: Fetches all organizations and builds an organization queue
2. **Projects Phase**: For each organization, fetches all projects and builds a project queue
3. **Components Phase**: For each project, fetches all components and their APIs

Each phase maintains its own API cursor (`orgApiCursor`, `projectApiCursor`, `componentApiCursor`) allowing safe resumption after interruptions. The cursor state tracks:

- Current phase (`orgs`, `projects`, `components`)
- API pagination cursors for each resource type
- Queues of organizations and projects to process
- Current position in each queue

### Requirements

Your OpenChoreo backend must support cursor-based pagination. The provider validates cursor support at startup and will throw an error if the API does not return the required `nextCursor` field in pagination responses.

### State Persistence

All ingestion state is persisted to the database:

- Cursors are saved after each burst
- Entity references are tracked for staleness detection
- Progress can resume from the last successful checkpoint
- Removed entities are detected by comparing current and previous ingestion snapshots

## Management API

The module provides REST API endpoints for monitoring and managing incremental ingestion:

- `GET /api/catalog/incremental/health` - Health check status for all providers
- `GET /api/catalog/incremental/providers` - List all registered incremental providers
- `GET /api/catalog/incremental/providers/{name}/status` - Get detailed status for a specific provider
- `POST /api/catalog/incremental/providers/{name}/reset` - Reset provider state to start fresh ingestion
- `POST /api/catalog/incremental/providers/{name}/refresh` - Trigger immediate refresh of provider data

## Database Migrations

The module includes automatic database migrations to create the necessary tables for state persistence:

- `openchoreo_incremental_ingestion_state` - Stores cursor state and ingestion metadata
- `openchoreo_incremental_entity_refs` - Tracks entity references for staleness detection

These migrations run automatically when the module is first loaded.

## Migration from Legacy Provider

If you were previously using the basic `catalog-backend-module-openchoreo` provider:

1. **Remove the old provider**: Remove the basic OpenChoreo provider module from your backend
2. **Add this incremental module**: Register this module as shown in the Installation section
3. **Update configuration**: Add the `incremental` configuration block (or use defaults)
4. **Verify API support**: Ensure your OpenChoreo API supports cursor-based pagination endpoints

## Extension Points

The module provides extension points for advanced use cases:

### Incremental Provider Extension Point

You can extend the module with custom incremental entity providers:

```typescript
import {
openchoreoIncrementalProvidersExtensionPoint,
type OpenChoreoIncrementalProviderExtensionPoint,
} from '@openchoreo/plugin-catalog-backend-module-openchoreo-incremental';

// In your backend module
export default createBackendModule({
pluginId: 'catalog',
moduleId: 'custom-incremental-provider',
register(env) {
env.registerInit({
deps: {
providers: openchoreoIncrementalProvidersExtensionPoint,
},
async init({ providers }) {
providers.addIncrementalEntityProvider(new CustomIncrementalProvider());
},
});
},
});
```

### Custom Provider Implementation

Implement the `IncrementalEntityProvider` interface for custom providers:

```typescript
import {
IncrementalEntityProvider,
EntityIteratorResult,
} from '@openchoreo/plugin-catalog-backend-module-openchoreo-incremental';

class CustomIncrementalProvider
implements IncrementalEntityProvider<MyCursor, MyContext>
{
getProviderName(): string {
return 'custom-provider';
}

async around(burst: (context: MyContext) => Promise<void>): Promise<void> {
// Setup and teardown logic
await burst(context);
}

async next(
context: MyContext,
cursor?: MyCursor,
): Promise<EntityIteratorResult<MyCursor>> {
// Return batch of entities and next cursor
}
}
```

## Features

- **Burst-Based Processing**: Controlled load with configurable burst and rest cycles
- **Three-Phase Traversal**: Systematic ingestion of organizations → projects → components
- **Cursor-Based Pagination**: Stable API cursors for efficient, resumable pagination
- **Memory Efficient**: Processes entities in small chunks without loading large datasets
- **Scalable**: Handles very large datasets efficiently with constant memory usage
- **Fault Tolerant**: Resumes from last successful checkpoint after interruptions
- **Configurable**: Customizable burst intervals, rest periods, chunk sizes, and retry backoff
- **Error Resilient**: Exponential backoff strategy with configurable retry intervals
- **Staleness Detection**: Automatically removes entities that no longer exist in OpenChoreo
- **Metrics & Observability**: OpenTelemetry metrics for monitoring ingestion progress
- **Event-Driven Updates**: Supports delta updates via Backstage events system
- **Management API**: REST endpoints for monitoring and controlling ingestion processes
- **Database Persistence**: Automatic migrations and state management
- **Extension Points**: Pluggable architecture for custom incremental providers
- **Health Monitoring**: Built-in health checks and provider status reporting
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright 2024 The Backstage Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* Development setup for testing the OpenChoreo incremental ingestion plugin.
* Creates a backend with a dummy provider to simulate incremental entity processing.
*/

import { createBackend } from '@backstage/backend-defaults';
import {
coreServices,
createBackendModule,
} from '@backstage/backend-plugin-api';
import { mockServices } from '@backstage/backend-test-utils';
import {
IncrementalEntityProvider,
openchoreoIncrementalProvidersExtensionPoint,
type OpenChoreoIncrementalProviderExtensionPoint,
} from '../src';

const dummyProvider = createBackendModule({
pluginId: 'catalog',
moduleId: 'openchoreo-test-provider',
register(reg) {
reg.registerInit({
deps: {
logger: coreServices.logger,
providers: openchoreoIncrementalProvidersExtensionPoint,
},
async init({
logger,
providers,
}: {
logger: any;
providers: OpenChoreoIncrementalProviderExtensionPoint;
}) {
const provider: IncrementalEntityProvider<number, {}> = {
getProviderName: () => 'test-provider',
around: burst => burst(0),
next: async (_context, cursor) => {
await new Promise(resolve => setTimeout(resolve, 500));
if (cursor === undefined || cursor < 3) {
logger.info(`### Returning batch #${cursor}`);
return { done: false, entities: [], cursor: (cursor ?? 0) + 1 };
}

logger.info('### Last batch reached, stopping');
return { done: true };
},
};

providers.addProvider({
provider: provider,
options: {
burstInterval: { seconds: 1 },
burstLength: { seconds: 10 },
restLength: { seconds: 10 },
},
});
},
});
},
});

const backend = createBackend();
backend.add(
mockServices.rootConfig.factory({
data: {
backend: {
baseUrl: 'http://localhost:7007',
listen: ':7007',
database: { client: 'better-sqlite3', connection: ':memory:' },
},
},
}),
);
backend.add(import('@backstage/plugin-catalog-backend'));
backend.add(import('../src'));
backend.add(dummyProvider);
backend.start();
Loading
Loading