diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 07ac2ce6..61732a7f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,6 +7,22 @@ on: branches: [main] jobs: + # Dependency review for PRs - checks for vulnerabilities in dependency changes + dependency-review: + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' + + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Dependency Review + uses: actions/dependency-review-action@v4 + with: + fail-on-severity: high + # Allow existing vulnerabilities, only fail on new ones + deny-licenses: '' + build: runs-on: windows-latest @@ -23,6 +39,21 @@ jobs: - name: Restore dependencies run: dotnet restore + - name: Check for vulnerable packages + shell: pwsh + run: | + $output = dotnet list package --vulnerable --include-transitive 2>&1 + $output | Write-Host + + if ($output -match "has the following vulnerable packages") { + Write-Host "" + Write-Host "::warning::Vulnerable packages detected - review security advisories above" + # Note: Not failing the build to avoid blocking on transitive dependencies + # that require upstream fixes. Dependency-review-action will catch new vulns. + } else { + Write-Host "No known vulnerabilities found in packages" + } + - name: Build run: dotnet build --configuration Release --no-restore diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000..48d545af --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,75 @@ +# ============================================================================= +# Security: CodeQL Analysis +# ============================================================================= +# Performs semantic code analysis to find security vulnerabilities and +# code quality issues in C# code. +# +# Triggers: +# - Push to main branch +# - Pull requests to main +# - Weekly schedule (Monday 9 AM UTC) +# +# For more info: https://docs.github.com/en/code-security/code-scanning +# ============================================================================= + +name: 'Security: CodeQL' + +on: + push: + branches: + - main + paths: + - 'src/**/*.cs' + - '**/*.csproj' + - '*.sln' + + pull_request: + branches: + - main + paths: + - 'src/**/*.cs' + - '**/*.csproj' + - '*.sln' + + schedule: + # Weekly scan on Monday at 9 AM UTC + - cron: '0 9 * * 1' + + workflow_dispatch: + +jobs: + analyze: + name: Analyze C# + runs-on: ubuntu-latest + + permissions: + security-events: write + packages: read + actions: read + contents: read + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: + languages: csharp + # Use security-and-quality for comprehensive analysis + queries: security-and-quality + + - name: Setup .NET SDK + uses: actions/setup-dotnet@v5 + with: + dotnet-version: '8.x' + + - name: Build solution + run: | + dotnet restore + dotnet build --no-restore --configuration Release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v4 + with: + category: "/language:csharp" diff --git a/.github/workflows/publish-nuget.yml b/.github/workflows/publish-nuget.yml index 1a63fe9f..2dbfe096 100644 --- a/.github/workflows/publish-nuget.yml +++ b/.github/workflows/publish-nuget.yml @@ -9,10 +9,10 @@ jobs: runs-on: windows-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Setup .NET - uses: actions/setup-dotnet@v4 + uses: actions/setup-dotnet@v5 with: dotnet-version: | 8.0.x diff --git a/CHANGELOG.md b/CHANGELOG.md index e1f7adca..53dcc8cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,10 +32,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Connection selection strategies: RoundRobin, LeastConnections, ThrottleAware - Throttle tracking with automatic routing away from throttled connections - Bulk operation wrappers: CreateMultiple, UpdateMultiple, UpsertMultiple, DeleteMultiple + - `IProgress` support for real-time progress reporting during bulk operations - DI integration via `AddDataverseConnectionPool()` extension method - Affinity cookie disabled by default for improved throughput - Targets: `net8.0`, `net10.0` +### Documentation + +- Added UpsertMultiple pitfalls section to `BULK_OPERATIONS_PATTERNS.md` - documents the duplicate key error when setting alternate key columns in both `KeyAttributes` and `Attributes` + ### Changed - Updated publish workflow to support multiple packages and extract version from git tag diff --git a/CLAUDE.md b/CLAUDE.md index 67409517..1964d4f1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,6 +1,6 @@ # CLAUDE.md - ppds-sdk -**NuGet packages for Power Platform plugin development.** +**NuGet packages for Power Platform development: plugin attributes, Dataverse connectivity, and migration tooling.** **Part of the PPDS Ecosystem** - See `C:\VS\ppds\CLAUDE.md` for cross-project context. @@ -15,6 +15,10 @@ | Skip XML documentation on public APIs | Consumers need IntelliSense documentation | | Multi-target without testing all frameworks | Dataverse has specific .NET requirements | | Commit with failing tests | All tests must pass before merge | +| Create new ServiceClient per request | 42,000x slower than Clone/pool pattern; wastes ~446ms per instance | +| Guess parallelism values | Use `RecommendedDegreesOfParallelism` from server; guessing degrades performance | +| Enable affinity cookie for bulk operations | Routes all requests to single backend node; 10x throughput loss | +| Store pooled clients in fields | Causes connection leaks; get per operation, dispose immediately | --- @@ -28,6 +32,10 @@ | Run `dotnet test` before PR | Ensures no regressions | | Update `CHANGELOG.md` with changes | Release notes for consumers | | Follow SemVer versioning | Clear compatibility expectations | +| Use connection pool for multi-request scenarios | Reuses connections, applies performance settings automatically | +| Dispose pooled clients with `await using` | Returns connections to pool; prevents leaks | +| Use bulk APIs (`CreateMultiple`, `UpdateMultiple`, `UpsertMultiple`) | 5x faster than `ExecuteMultiple` (~10M vs ~2M records/hour) | +| Reference Microsoft Learn docs in ADRs | Authoritative source for Dataverse best practices | --- @@ -47,13 +55,25 @@ ``` ppds-sdk/ ├── src/ -│ └── PPDS.Plugins/ -│ ├── Attributes/ # PluginStepAttribute, PluginImageAttribute -│ ├── Enums/ # PluginStage, PluginMode, PluginImageType -│ ├── PPDS.Plugins.csproj -│ └── PPDS.Plugins.snk # Strong name key (DO NOT regenerate) +│ ├── PPDS.Plugins/ +│ │ ├── Attributes/ # PluginStepAttribute, PluginImageAttribute +│ │ ├── Enums/ # PluginStage, PluginMode, PluginImageType +│ │ ├── PPDS.Plugins.csproj +│ │ └── PPDS.Plugins.snk # Strong name key (DO NOT regenerate) +│ ├── PPDS.Dataverse/ +│ │ ├── BulkOperations/ # CreateMultiple, UpdateMultiple, UpsertMultiple +│ │ ├── Client/ # DataverseClient, IDataverseClient +│ │ ├── Pooling/ # Connection pool, strategies +│ │ ├── Resilience/ # Throttle tracking, retry logic +│ │ └── PPDS.Dataverse.csproj +│ ├── PPDS.Migration/ # Migration engine library +│ └── PPDS.Migration.Cli/ # CLI tool (ppds-migrate) ├── tests/ -│ └── PPDS.Plugins.Tests/ +│ ├── PPDS.Plugins.Tests/ +│ └── PPDS.Dataverse.Tests/ +├── docs/ +│ ├── adr/ # Architecture Decision Records +│ └── architecture/ # Pattern documentation ├── .github/workflows/ │ ├── build.yml # CI build │ ├── test.yml # CI tests @@ -200,11 +220,64 @@ namespace PPDS.Plugins.Enums; // Enums |------|---------| | `PPDS.Plugins.csproj` | Project config, version, NuGet metadata | | `PPDS.Plugins.snk` | Strong name key (DO NOT regenerate) | +| `PPDS.Dataverse.csproj` | Dataverse client library | | `CHANGELOG.md` | Release notes | | `.editorconfig` | Code style settings | --- +## ⚡ Dataverse Performance (PPDS.Dataverse) + +### Microsoft's Required Settings for Maximum Throughput + +The connection pool automatically applies these settings. If bypassing the pool, you MUST apply them manually: + +```csharp +ThreadPool.SetMinThreads(100, 100); // Default is 4 +ServicePointManager.DefaultConnectionLimit = 65000; // Default is 2 +ServicePointManager.Expect100Continue = false; +ServicePointManager.UseNagleAlgorithm = false; +``` + +### Service Protection Limits (Per User, Per 5-Minute Window) + +| Limit | Value | +|-------|-------| +| Requests | 6,000 | +| Execution time | 20 minutes | +| Concurrent requests | 52 (check `x-ms-dop-hint` header) | + +### Throughput Benchmarks (Microsoft Reference) + +| Approach | Throughput | +|----------|------------| +| Single requests | ~50K records/hour | +| ExecuteMultiple | ~2M records/hour | +| CreateMultiple/UpdateMultiple | ~10M records/hour | +| Elastic tables | ~120M writes/hour | + +### Key Documentation + +- [Optimize performance for bulk operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/optimize-performance-create-update) +- [Send parallel requests](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/send-parallel-requests) +- [Service protection API limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) +- [Use bulk operation messages](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/bulk-operations) + +### Throttle Recovery (Known Limitation) + +The pool handles service protection errors transparently (waits and retries). However, it currently resumes at **full parallelism** after recovery, which can cause re-throttling with extended `Retry-After` durations. + +**Microsoft recommends** gradual ramp-up after throttle recovery. This is planned for a future enhancement (see ADR-0004). + +**Workaround**: Use lower `MaxParallelBatches` to reduce throttle frequency: + +```csharp +var options = new BulkOperationOptions { MaxParallelBatches = 10 }; +await executor.UpsertMultipleAsync(entities, options); +``` + +--- + ## 🧪 Testing Requirements - **Target 80% code coverage** diff --git a/docs/BULK_OPERATIONS_BENCHMARKS.md b/docs/BULK_OPERATIONS_BENCHMARKS.md new file mode 100644 index 00000000..4ef97583 --- /dev/null +++ b/docs/BULK_OPERATIONS_BENCHMARKS.md @@ -0,0 +1,168 @@ +# Bulk Operations Benchmarks + +Performance testing for bulk operations against Dataverse. + +## Test Environment + +- **Entity:** `ppds_zipcode` (simple entity with alternate key) +- **Record count:** 42,366 +- **Environment:** Developer environment (single tenant) +- **App registrations:** Single (one set of API limits) +- **Parallelism tested:** Server-recommended (5) and elevated (50) + +## Microsoft's Reference Benchmarks + +From [Microsoft Learn - Optimize performance for bulk operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/optimize-performance-create-update): + +| Approach | Throughput | Notes | +|----------|------------|-------| +| Single requests | ~50K records/hour | Baseline | +| ExecuteMultiple | ~2M records/hour | 40x improvement | +| CreateMultiple/UpdateMultiple | ~10M records/hour | 5x over ExecuteMultiple | +| Elastic tables (Cosmos DB) | ~120M writes/hour | Azure Cosmos DB backend | + +> "Bulk operation APIs like CreateMultiple, UpdateMultiple, and UpsertMultiple can provide throughput improvement of up to 5x, growing from 2 million records created per hour using ExecuteMultiple to the creation of 10 million records in less than an hour." + +## Microsoft's Batch Size Recommendation + +From [Microsoft Learn - Use bulk operation messages](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/bulk-operations): + +> "Generally, we expect that **100 - 1,000 records per request** is a reasonable place to start if the size of the record data is small and there are no plug-ins." + +For elastic tables specifically: +> "The recommended number of record operations to send with CreateMultiple and UpdateMultiple for elastic tables is **100**." + +## Results: Creates (UpsertMultiple) + +### Standard Mode (Server-Recommended Parallelism) + +| Approach | Batch Size | Parallelism | Time (s) | Throughput (rec/s) | Notes | +|----------|------------|-------------|----------|-------------------|-------| +| Single ServiceClient | 100 | 4 | 933 | 45.4 | Baseline | +| Connection Pool | 100 | 4 | 888 | 47.7 | 5% faster than baseline | +| Connection Pool | 1000 | 4 | 919 | 46.1 | 3% slower than batch 100 | +| Connection Pool | 100 | 5 (server) | 704 | **60.2** | +26% using server-recommended parallelism | + +### High-Throughput Mode (Elevated Parallelism) + +For bulk data loading scenarios where throughput is critical, parallelism can be increased beyond the server-recommended value: + +| Approach | Batch Size | Parallelism | Time (s) | Throughput (rec/s) | Notes | +|----------|------------|-------------|----------|-------------------|-------| +| Connection Pool | 100 | 50 | 83 | **508.6** | 8.4x faster than server-recommended | + +**Key result:** 42,366 records loaded in 83 seconds with zero failures. + +### When to Use Each Mode + +| Mode | Parallelism | Use Case | +|------|-------------|----------| +| **Standard** | Server-recommended (typically 5) | Interactive operations, mixed workloads, shared environments | +| **High-Throughput** | 50+ | Bulk data migrations, initial data loads, batch processing jobs | + +**Considerations for high-throughput mode:** + +- Requires sufficient pool connections (`MaxPoolSize` ≥ parallelism) +- Consumes more API quota - avoid during business hours on shared environments +- Single app registration was used; multiple app registrations could potentially increase throughput further (untested) +- Monitor for throttling in production; the SDK handles 429 responses automatically + +### Key Findings + +1. **Server-recommended parallelism is a safe default** (+26% vs hardcoded) + - `RecommendedDegreesOfParallelism` returns server-tuned value + - Automatically adapts to environment capacity + - No guesswork required + +2. **Elevated parallelism unlocks massive gains for bulk operations** (+744% over server-recommended) + - 508.6 rec/s vs 60.2 rec/s + - ~1.83M records/hour vs ~217K records/hour + - Appropriate for dedicated data loading scenarios + +3. **Connection Pool is faster than Single ServiceClient** (+5%) + - True parallelism with independent connections + - No internal locking/serialization overhead + - Affinity cookie disabled improves server-side distribution + +4. **Batch size 100 is optimal** (+3% vs batch 1000) + - Aligns with Microsoft's recommendation + - More granular parallelism + - Less memory pressure per request + +### Recommended Configurations + +**Standard (default):** Connection Pool + Batch Size 100 + Server Parallelism = **60.2 records/sec** (~217K/hour) + +**High-Throughput:** Connection Pool + Batch Size 100 + Parallelism 50 = **508.6 records/sec** (~1.83M/hour) + +## Results: Updates (UpsertMultiple) + +| Approach | Batch Size | Time (s) | Throughput (rec/s) | Notes | +|----------|------------|----------|-------------------|-------| +| Connection Pool | 100 | 1153 | 36.7 | Alternate key lookup overhead | + +### Observations + +- Updates are ~23% slower than creates (36.7/s vs 47.7/s) +- Expected due to server-side alternate key lookup before modification +- Connection approach doesn't affect this - it's server-side overhead + +## Configuration + +```json +{ + "Dataverse": { + "Pool": { + "Enabled": true, + "MaxPoolSize": 50, + "MinPoolSize": 5, + "DisableAffinityCookie": true + } + } +} +``` + +```csharp +var options = new BulkOperationOptions +{ + BatchSize = 100 + // MaxParallelBatches omitted - uses RecommendedDegreesOfParallelism from server +}; +``` + +## Analysis: Our Results vs Microsoft Benchmarks + +Microsoft's reference benchmark shows ~10M records/hour for CreateMultiple/UpdateMultiple. Our high-throughput mode achieved **~1.83M records/hour** in a developer environment. + +The gap is expected due to: + +1. **Developer environment** - Single-tenant dev environments have lower resource allocation than production +2. **Single app registration** - One client credential = one set of API limits +3. **Entity complexity** - Alternate key lookups add overhead +4. **Service protection limits** - Dev environments have stricter throttling + +In production environments with multiple app registrations (each with independent API quotas), throughput could approach Microsoft's benchmarks. + +### Progression Summary + +| Change | Improvement | Throughput | +|--------|-------------|------------| +| Single client (baseline) | — | 45.4 rec/s | +| → Connection pool | +5% | 47.7 rec/s | +| → Batch 100 (vs 1000) | +3% | — | +| → Server-recommended parallelism | +26% | 60.2 rec/s | +| → Elevated parallelism (50) | +744% | **508.6 rec/s** | +| **Total improvement** | **+1,020%** | 45.4 → 508.6 rec/s | + +### Key Insights + +1. **Server-recommended parallelism is a good starting point** - Provides +26% improvement with automatic tuning +2. **Elevated parallelism is the largest lever** - +744% improvement for bulk operations +3. **Multi-app-registration pooling** - Untested but theoretically could multiply throughput further by distributing load across independent API quotas + +## References + +- [Optimize performance for bulk operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/optimize-performance-create-update) +- [Use bulk operation messages](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/bulk-operations) +- [Send parallel requests](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/send-parallel-requests) +- [Service protection API limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) diff --git a/docs/adr/0001_DISABLE_AFFINITY_COOKIE.md b/docs/adr/0001_DISABLE_AFFINITY_COOKIE.md index 524e8107..27db5e1f 100644 --- a/docs/adr/0001_DISABLE_AFFINITY_COOKIE.md +++ b/docs/adr/0001_DISABLE_AFFINITY_COOKIE.md @@ -49,3 +49,5 @@ Set `DisableAffinityCookie = false` for: - [ServiceClient Discussion #312](https://github.com/microsoft/PowerPlatform-DataverseServiceClient/discussions/312) - Microsoft confirms order-of-magnitude improvement - [Service protection API limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) +- [Send parallel requests](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/send-parallel-requests) - Microsoft's guidance on disabling affinity cookie +- [Optimize performance for bulk operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/optimize-performance-create-update) - Performance optimization patterns diff --git a/docs/adr/0002_MULTI_CONNECTION_POOLING.md b/docs/adr/0002_MULTI_CONNECTION_POOLING.md index acaef191..5030d0fe 100644 --- a/docs/adr/0002_MULTI_CONNECTION_POOLING.md +++ b/docs/adr/0002_MULTI_CONNECTION_POOLING.md @@ -61,5 +61,7 @@ services.AddDataverseConnectionPool(options => ## References -- [Service protection API limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) -- [Application User setup](https://learn.microsoft.com/en-us/power-platform/admin/manage-application-users) +- [Service protection API limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) - Throttling thresholds per user +- [Application User setup](https://learn.microsoft.com/en-us/power-platform/admin/manage-application-users) - Provisioning app registrations +- [Optimize performance for bulk operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/optimize-performance-create-update) - User multiplexing guidance +- [Scaling Dynamics 365 CRM Integrations in Azure](https://techcommunity.microsoft.com/blog/microsoftmissioncriticalblog/scaling-dynamics-365-crm-integrations-in-azure-the-right-way-to-use-the-sdk-s/4447143) - Multi-user distribution patterns diff --git a/docs/adr/0003_THROTTLE_AWARE_SELECTION.md b/docs/adr/0003_THROTTLE_AWARE_SELECTION.md index 4a6093af..5b767a13 100644 --- a/docs/adr/0003_THROTTLE_AWARE_SELECTION.md +++ b/docs/adr/0003_THROTTLE_AWARE_SELECTION.md @@ -58,5 +58,7 @@ Request N → AppUser1 (cooldown expired, available again) ✓ ## References -- [Retry-After header](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits#retry-operations) -- [Service protection limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) +- [Retry-After header](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits#retry-operations) - How to handle throttle responses +- [Service protection limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) - Throttling thresholds and error codes +- [Optimize performance for bulk operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/optimize-performance-create-update) - Throttle handling best practices +- [Maximize API throughput](https://learn.microsoft.com/en-us/dynamics365/fin-ops-core/dev-itpro/data-entities/service-protection-maximizing-api-throughput) - Strategies for high-throughput scenarios diff --git a/docs/adr/0004_THROTTLE_RECOVERY_STRATEGY.md b/docs/adr/0004_THROTTLE_RECOVERY_STRATEGY.md new file mode 100644 index 00000000..d765e909 --- /dev/null +++ b/docs/adr/0004_THROTTLE_RECOVERY_STRATEGY.md @@ -0,0 +1,112 @@ +# ADR-0004: Throttle Recovery Strategy + +**Status:** Accepted (with known limitation) +**Applies to:** PPDS.Dataverse + +## Context + +When all connections are throttled, the pool must wait for the `Retry-After` period before resuming operations. Microsoft recommends a gradual ramp-up strategy after throttle recovery to minimize extended penalties: + +> "If the application continues to send such demanding requests, the duration is extended to minimize the impact on shared resources. This causes the individual retry-after duration period to be longer." +> +> "When possible, we recommend trying to achieve a consistent rate by starting with a lower number of requests and gradually increasing until you start hitting the service protection API limits." + +## Decision + +### Current Implementation (v1) + +The pool implements **transparent throttle waiting** with immediate full-parallelism recovery: + +1. **Throttle detection**: PooledClient automatically records throttle via callback +2. **Wait phase**: `GetClientAsync` waits for throttle to clear **without holding semaphore slots** +3. **Recovery**: Resume at full configured parallelism immediately + +``` +Throttle detected → Wait for Retry-After → Resume at 100% parallelism +``` + +### Key Design: Semaphore Not Held During Wait + +The pool separates "waiting for throttle" from "holding a connection slot": + +```csharp +// Phase 1: Wait for non-throttled connection (NO semaphore held) +await WaitForNonThrottledConnectionAsync(cancellationToken); + +// Phase 2: Acquire semaphore (only when ready to use connection) +await _connectionSemaphore.WaitAsync(timeout, cancellationToken); + +// Phase 3: Get and use connection +return GetConnectionFromPoolCore(connectionName, options); +``` + +This prevents `PoolExhaustedException` when many requests are waiting for throttle recovery. + +## Known Limitation + +**The current implementation does not implement adaptive scaling after throttle recovery.** + +Resuming at full parallelism immediately after `Retry-After` can cause: +- Immediate re-throttling +- Progressively longer `Retry-After` durations +- Suboptimal total throughput + +### Optimal Behavior (Future Enhancement) + +Microsoft recommends TCP-like congestion control: + +``` +After throttle recovery: +1. Resume at reduced parallelism (e.g., 50%) +2. Gradually ramp up if successful +3. Back off immediately if throttled again +4. Find and maintain sustainable rate +``` + +## Planned Enhancement + +Adaptive rate control using AIMD (Additive Increase, Multiplicative Decrease) algorithm is designed and ready for implementation. + +**See:** [ADAPTIVE_RATE_CONTROL_SPEC.md](../architecture/ADAPTIVE_RATE_CONTROL_SPEC.md) + +Key features: +- Start at 50% of `RecommendedDegreesOfParallelism` +- Increase gradually after sustained success (batch count + time interval) +- Halve parallelism on throttle +- Fast recovery to last-known-good, then cautious probing +- 5-minute TTL on historical state (matches Microsoft's rolling window) +- Idle reset for long-running integrations + +## Consequences + +### Positive + +- **No blocking**: Requests don't hold semaphore slots while waiting +- **Transparent**: Consumer doesn't need to handle service protection errors +- **Simple**: Easy to understand and debug + +### Negative + +- **Suboptimal recovery**: Full parallelism after recovery may cause re-throttling +- **Extended penalties**: Aggressive resumption can extend `Retry-After` durations +- **Consumer workaround needed**: For optimal throughput, consumers should manage parallelism externally + +### Consumer Workaround + +Until adaptive scaling is implemented, consumers can manage parallelism manually: + +```csharp +// Start conservative, let the pool handle throttle waiting +var options = new BulkOperationOptions +{ + MaxParallelBatches = 10 // Lower than RecommendedDegreesOfParallelism +}; + +await executor.UpsertMultipleAsync(entities, options); +``` + +## References + +- [Service protection API limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) - Retry-After behavior +- [Maximize API throughput](https://learn.microsoft.com/en-us/dynamics365/fin-ops-core/dev-itpro/data-entities/service-protection-maximizing-api-throughput) - Microsoft's ramp-up recommendation +- ADR-0003: Throttle-Aware Connection Selection - Related throttle handling decision diff --git a/docs/adr/0005_POOL_SIZING_PER_CONNECTION.md b/docs/adr/0005_POOL_SIZING_PER_CONNECTION.md new file mode 100644 index 00000000..6fd06851 --- /dev/null +++ b/docs/adr/0005_POOL_SIZING_PER_CONNECTION.md @@ -0,0 +1,102 @@ +# ADR-0005: Pool Sizing Per Connection + +**Status:** Approved for Implementation +**Applies to:** PPDS.Dataverse +**Date:** 2025-12-23 + +## Context + +Microsoft's service protection limits are **per Application User** (per connection), not per environment: + +- Each Application User can handle 52 concurrent requests (`RecommendedDegreesOfParallelism`) +- Multiple Application Users have **independent quotas** + +Current configuration uses a shared pool size: + +```csharp +public class PoolOptions +{ + public int MaxPoolSize { get; set; } = 50; // Shared across all connections +} +``` + +With 2 connections configured, this results in ~25 connections per user, leaving ~50% of available capacity unused. + +## Decision + +Change the default from **shared pool size** to **per-connection pool size**: + +```csharp +public class PoolOptions +{ + /// + /// Maximum concurrent connections per Application User (connection configuration). + /// Default: 52 (matches Microsoft's RecommendedDegreesOfParallelism). + /// Total pool capacity = this × number of configured connections. + /// + public int MaxConnectionsPerUser { get; set; } = 52; + + /// + /// Legacy: Maximum total pool size across all connections. + /// If set to non-zero, overrides MaxConnectionsPerUser calculation. + /// Default: 0 (use per-connection sizing). + /// + [Obsolete("Use MaxConnectionsPerUser for optimal throughput")] + public int MaxPoolSize { get; set; } = 0; +} +``` + +### Behavior + +| Scenario | Calculation | Result | +|----------|-------------|--------| +| 1 connection, default | 1 × 52 | 52 total capacity | +| 2 connections, default | 2 × 52 | 104 total capacity | +| 4 connections, default | 4 × 52 | 208 total capacity | +| Legacy MaxPoolSize = 50 | 50 (ignores per-connection) | 50 total capacity | + +### Implementation + +```csharp +private int CalculateTotalPoolCapacity() +{ + // Legacy override takes precedence + #pragma warning disable CS0618 + if (_options.Pool.MaxPoolSize > 0) + { + return _options.Pool.MaxPoolSize; + } + #pragma warning restore CS0618 + + // Per-connection sizing (recommended) + return _options.Connections.Count * _options.Pool.MaxConnectionsPerUser; +} + +// Semaphore initialization +var totalCapacity = CalculateTotalPoolCapacity(); +_connectionSemaphore = new SemaphoreSlim(totalCapacity); +``` + +## Consequences + +### Positive + +- **Optimal by default** - Utilizes full available quota without manual tuning +- **Scales naturally** - Add connections, get proportional capacity +- **Aligns with Microsoft** - Per-user limits match per-user pool sizing +- **Simple mental model** - "Each user can do 52 concurrent" + +### Negative + +- **Higher resource usage** - More connections = more memory +- **Breaking change for some** - Users expecting shared sizing may be surprised +- **Need migration path** - Document the change, keep legacy option + +### Migration + +Users who explicitly set `MaxPoolSize` keep their behavior. Users on defaults get improved throughput automatically. + +## References + +- [Service Protection API Limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) - Per-user limits +- [Send Parallel Requests](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/send-parallel-requests) - RecommendedDegreesOfParallelism diff --git a/docs/architecture/ADAPTIVE_RATE_CONTROL_SPEC.md b/docs/architecture/ADAPTIVE_RATE_CONTROL_SPEC.md new file mode 100644 index 00000000..3e953375 --- /dev/null +++ b/docs/architecture/ADAPTIVE_RATE_CONTROL_SPEC.md @@ -0,0 +1,425 @@ +# Adaptive Rate Control - Design Specification + +**Status:** Approved for Implementation +**Target:** Next release branch +**Author:** Claude Code +**Date:** 2025-12-23 + +--- + +## Problem Statement + +After throttle recovery, the pool resumes at full parallelism, causing: +- Immediate re-throttling +- Progressively longer `Retry-After` durations (server extends penalties for aggressive clients) +- Suboptimal total throughput + +Microsoft recommends: *"Start with a lower number of requests and gradually increase until you start hitting the service protection API limits. After that, let the server tell you how many requests it can handle."* + +**Reference:** [Maximize API Throughput](https://learn.microsoft.com/en-us/dynamics365/fin-ops-core/dev-itpro/data-entities/service-protection-maximizing-api-throughput) + +--- + +## Solution: AIMD-based Adaptive Rate Control + +Implement **Additive Increase, Multiplicative Decrease** (AIMD) - the algorithm that powers TCP congestion control, adapted for Dataverse API rate limiting. + +### Core Principles + +1. **Start conservative** - Begin at 50% of recommended parallelism +2. **Increase gradually** - Add parallelism only after sustained success +3. **Decrease aggressively** - Halve parallelism on throttle +4. **Fast recovery** - Return to last-known-good quickly, then probe cautiously +5. **Time-aware** - Respect Microsoft's 5-minute rolling window + +--- + +## Algorithm + +### State (Per Connection) + +``` +currentParallelism: int # Current allowed concurrent requests +maxParallelism: int # Ceiling from RecommendedDegreesOfParallelism +lastKnownGoodParallelism: int # Level before last throttle +lastKnownGoodTimestamp: DateTime # When lastKnownGood was recorded +successesSinceThrottle: int # Counter for stabilization +lastIncreaseTimestamp: DateTime # When we last increased (time-gating) +lastActivityTimestamp: DateTime # When we last had any activity (idle detection) +totalThrottleEvents: int # Statistics +``` + +### Initialization + +``` +On first request for connection: + maxParallelism = ServiceClient.RecommendedDegreesOfParallelism + currentParallelism = floor(maxParallelism × InitialParallelismFactor) + lastKnownGoodParallelism = currentParallelism + lastKnownGoodTimestamp = now + successesSinceThrottle = 0 + lastIncreaseTimestamp = now + lastActivityTimestamp = now +``` + +### On Batch Success + +``` +lastActivityTimestamp = now +successesSinceThrottle++ + +# Check if lastKnownGood is stale (older than TTL) +if (now - lastKnownGoodTimestamp > LastKnownGoodTTL): + lastKnownGoodParallelism = currentParallelism # Treat current as baseline + +# Check if we can increase (batch count AND time elapsed) +canIncrease = successesSinceThrottle >= StabilizationBatches + AND (now - lastIncreaseTimestamp) >= MinIncreaseInterval + +if canIncrease: + if currentParallelism < lastKnownGoodParallelism: + # Fast recovery phase - get back to known-good quickly + increase = IncreaseRate × RecoveryMultiplier + else: + # Probing phase - cautiously explore above known-good + increase = IncreaseRate + + currentParallelism = min(currentParallelism + increase, maxParallelism) + successesSinceThrottle = 0 + lastIncreaseTimestamp = now +``` + +### On Throttle + +``` +lastActivityTimestamp = now +totalThrottleEvents++ + +# Remember current level as "almost good" (we were one step too high) +lastKnownGoodParallelism = max(currentParallelism - IncreaseRate, MinParallelism) +lastKnownGoodTimestamp = now + +# Multiplicative decrease +currentParallelism = max(floor(currentParallelism × DecreaseFactor), MinParallelism) +successesSinceThrottle = 0 +``` + +### On Get Parallelism (Before Each Chunk) + +``` +lastActivityTimestamp = now + +# Check for idle reset +if (now - lastActivityTimestamp) > IdleResetPeriod: + Reset() # Start fresh + +return currentParallelism +``` + +### Reset + +``` +currentParallelism = floor(maxParallelism × InitialParallelismFactor) +lastKnownGoodParallelism = currentParallelism +lastKnownGoodTimestamp = now +successesSinceThrottle = 0 +lastIncreaseTimestamp = now +# Note: totalThrottleEvents is NOT reset (cumulative stat) +``` + +--- + +## Configuration + +```csharp +public class AdaptiveRateOptions +{ + /// + /// Enable/disable adaptive rate control. Default: true. + /// When disabled, uses fixed parallelism from RecommendedDegreesOfParallelism. + /// + public bool Enabled { get; set; } = true; + + /// + /// Initial parallelism as factor of max (0.1-1.0). Default: 0.5. + /// Starts at 50% of RecommendedDegreesOfParallelism. + /// + public double InitialParallelismFactor { get; set; } = 0.5; + + /// + /// Minimum parallelism floor. Default: 1. + /// Never goes below this regardless of throttling. + /// + public int MinParallelism { get; set; } = 1; + + /// + /// Parallelism increase amount per stabilization period. Default: 2. + /// + public int IncreaseRate { get; set; } = 2; + + /// + /// Multiplier applied on throttle (0.1-0.9). Default: 0.5. + /// Halves parallelism on throttle. + /// + public double DecreaseFactor { get; set; } = 0.5; + + /// + /// Successful batches required before considering increase. Default: 3. + /// Must also satisfy MinIncreaseInterval. + /// + public int StabilizationBatches { get; set; } = 3; + + /// + /// Minimum time between parallelism increases. Default: 5 seconds. + /// Prevents rapid oscillation when batches complete quickly. + /// + public TimeSpan MinIncreaseInterval { get; set; } = TimeSpan.FromSeconds(5); + + /// + /// Multiplier for recovery phase (getting back to last-known-good). Default: 2.0. + /// Increases faster during recovery, slower when probing new territory. + /// + public double RecoveryMultiplier { get; set; } = 2.0; + + /// + /// TTL for lastKnownGood value. Default: 5 minutes. + /// Matches Microsoft's rolling window. Stale values are discarded. + /// + public TimeSpan LastKnownGoodTTL { get; set; } = TimeSpan.FromMinutes(5); + + /// + /// Idle period after which state resets. Default: 5 minutes. + /// Long-running integrations with gaps get fresh starts. + /// + public TimeSpan IdleResetPeriod { get; set; } = TimeSpan.FromMinutes(5); +} +``` + +--- + +## Interface + +```csharp +public interface IAdaptiveRateController +{ + /// + /// Gets the current recommended parallelism for a connection. + /// Also updates last activity timestamp and checks for idle reset. + /// + /// The connection to get parallelism for. + /// The ceiling (from RecommendedDegreesOfParallelism). + /// Current parallelism to use. + int GetParallelism(string connectionName, int maxParallelism); + + /// + /// Records successful batch completion. May increase parallelism if stable. + /// + /// The connection that succeeded. + void RecordSuccess(string connectionName); + + /// + /// Records throttle event. Reduces parallelism. + /// + /// The connection that was throttled. + /// The Retry-After duration from server. + void RecordThrottle(string connectionName, TimeSpan retryAfter); + + /// + /// Manually resets state for a connection. + /// + /// The connection to reset. + void Reset(string connectionName); + + /// + /// Gets current statistics for monitoring/logging. + /// + /// The connection to get stats for. + /// Current statistics. + AdaptiveRateStatistics GetStatistics(string connectionName); +} + +public record AdaptiveRateStatistics +{ + public required string ConnectionName { get; init; } + public required int CurrentParallelism { get; init; } + public required int MaxParallelism { get; init; } + public required int LastKnownGoodParallelism { get; init; } + public required bool IsLastKnownGoodStale { get; init; } + public required int SuccessesSinceThrottle { get; init; } + public required int TotalThrottleEvents { get; init; } + public required DateTime? LastThrottleTime { get; init; } + public required DateTime? LastIncreaseTime { get; init; } + public required DateTime LastActivityTime { get; init; } +} +``` + +--- + +## Integration Points + +### BulkOperationExecutor Changes + +Replace fixed parallelism with chunked adaptive execution: + +```csharp +// Current (fixed parallelism) +var parallelism = await ResolveParallelismAsync(options.MaxParallelBatches, ct); +await Parallel.ForEachAsync(batches, new ParallelOptions { MaxDegreeOfParallelism = parallelism }, ...); + +// New (adaptive parallelism) +var maxParallelism = await GetMaxParallelismAsync(ct); +var batchQueue = new Queue>(batches); + +while (batchQueue.Count > 0) +{ + var connectionName = GetPrimaryConnectionName(); + var parallelism = _rateController.GetParallelism(connectionName, maxParallelism); + + // Dequeue up to 'parallelism' batches for this chunk + var chunk = DequeueChunk(batchQueue, parallelism); + + // Process chunk with current parallelism + var results = await ProcessChunkAsync(chunk, parallelism, ct); + + // Update rate controller based on results + foreach (var result in results) + { + if (result.WasThrottled) + _rateController.RecordThrottle(result.ConnectionName, result.RetryAfter); + else + _rateController.RecordSuccess(result.ConnectionName); + } + + // Log current state + var stats = _rateController.GetStatistics(connectionName); + _logger.LogDebug( + "Adaptive rate: {Current}/{Max} parallelism, {Successes} since throttle, {Total} total throttles", + stats.CurrentParallelism, stats.MaxParallelism, + stats.SuccessesSinceThrottle, stats.TotalThrottleEvents); +} +``` + +### Dependency Injection + +```csharp +services.AddSingleton(); +services.Configure(configuration.GetSection("Dataverse:AdaptiveRate")); +``` + +### Coordination with ThrottleTracker + +| Component | Responsibility | Scope | +|-----------|----------------|-------| +| `ThrottleTracker` | Binary: Is connection throttled? | Connection selection | +| `AdaptiveRateController` | Continuous: What parallelism? | Batch execution | + +They work together but don't duplicate: +- ThrottleTracker prevents using throttled connections +- AdaptiveRateController optimizes parallelism to avoid throttling + +--- + +## Example Scenario + +``` +Initial state: + maxParallelism = 52 (from server) + currentParallelism = 26 (50% of 52) + +Time 0:00 - Batch 1-3 succeed + successesSinceThrottle = 3 + MinIncreaseInterval not yet passed (< 5s) + → No increase yet + +Time 0:05 - Batch 4 succeeds + successesSinceThrottle = 4, interval passed + → Increase to 28 (probing: +2) + +Time 0:10 - Batch 5-7 succeed + successesSinceThrottle = 3, interval passed + → Increase to 30 + +...continues ramping... + +Time 1:00 - At parallelism 44, THROTTLE received + lastKnownGoodParallelism = 42 (44 - 2) + currentParallelism = 22 (44 × 0.5) + successesSinceThrottle = 0 + +Time 1:05 - Throttle clears, Batch resumes, succeeds + successesSinceThrottle = 1 + +Time 1:15 - 3 more successes, interval passed + currentParallelism < lastKnownGoodParallelism (22 < 42) + → Fast recovery: increase to 26 (+4, using 2× multiplier) + +Time 1:20 - 3 more successes, interval passed + → Fast recovery: increase to 30 + +...fast recovery continues... + +Time 1:45 - Reached lastKnownGood (42) + → Switch to probing: increase to 44 (+2) + +Time 1:50 - 3 more successes + → Probing: increase to 46 + +Time 6:00 - No activity for 5 minutes + → Idle reset: currentParallelism = 26, start fresh +``` + +--- + +## File Changes + +| File | Change | +|------|--------| +| `src/PPDS.Dataverse/Resilience/IAdaptiveRateController.cs` | New interface | +| `src/PPDS.Dataverse/Resilience/AdaptiveRateController.cs` | New implementation | +| `src/PPDS.Dataverse/Resilience/AdaptiveRateOptions.cs` | New configuration | +| `src/PPDS.Dataverse/Resilience/AdaptiveRateStatistics.cs` | New statistics record | +| `src/PPDS.Dataverse/DependencyInjection/DataverseOptions.cs` | Add `AdaptiveRate` section | +| `src/PPDS.Dataverse/DependencyInjection/ServiceCollectionExtensions.cs` | Register controller | +| `src/PPDS.Dataverse/BulkOperations/BulkOperationExecutor.cs` | Chunked adaptive execution | +| `tests/PPDS.Dataverse.Tests/Resilience/AdaptiveRateControllerTests.cs` | Unit tests | +| `docs/adr/0004_THROTTLE_RECOVERY_STRATEGY.md` | Update to reference this spec | + +--- + +## Testing Strategy + +### Unit Tests + +1. **Initialization** - Verify initial parallelism is factor of max +2. **Increase logic** - Verify stabilization batches AND time interval required +3. **Decrease logic** - Verify multiplicative decrease on throttle +4. **Fast recovery** - Verify 2× increase rate when below lastKnownGood +5. **Probing** - Verify 1× increase rate when above lastKnownGood +6. **TTL expiry** - Verify stale lastKnownGood is ignored +7. **Idle reset** - Verify state resets after idle period +8. **Thread safety** - Verify concurrent access is safe +9. **Min/max bounds** - Verify parallelism stays within bounds + +### Integration Tests + +1. **Simulated throttle scenario** - Verify recovery behavior +2. **Long-running simulation** - Verify TTL and idle reset +3. **Multi-connection** - Verify per-connection state isolation + +--- + +## Future Enhancements + +1. **Per-entity-type tracking** - Some entities have heavier plugins +2. **Predictive adjustment** - Learn patterns over time +3. **Telemetry integration** - Expose metrics for monitoring dashboards +4. **Circuit breaker** - Stop entirely if too many throttles in window + +--- + +## References + +- [Service Protection API Limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) +- [Maximize API Throughput](https://learn.microsoft.com/en-us/dynamics365/fin-ops-core/dev-itpro/data-entities/service-protection-maximizing-api-throughput) +- [TCP Congestion Control (AIMD)](https://en.wikipedia.org/wiki/Additive_increase/multiplicative_decrease) +- ADR-0004: Throttle Recovery Strategy diff --git a/docs/architecture/BULK_OPERATIONS_PATTERNS.md b/docs/architecture/BULK_OPERATIONS_PATTERNS.md index 5b19fa70..09dbbf3b 100644 --- a/docs/architecture/BULK_OPERATIONS_PATTERNS.md +++ b/docs/architecture/BULK_OPERATIONS_PATTERNS.md @@ -105,16 +105,27 @@ Only use bypass options when: ## Batching -Records are automatically batched (default: 1000 per request). Adjust for your scenario: +Records are automatically batched. Adjust for your scenario: ```csharp -// Smaller batches for complex records +// Recommended for most scenarios (aligns with Microsoft guidance and our benchmarks) new BulkOperationOptions { BatchSize = 100 } -// Max batch for simple records +// Maximum batch for simple records with no plugins new BulkOperationOptions { BatchSize = 1000 } ``` +### Batch Size Guidance + +| Scenario | Recommended Size | Rationale | +|----------|------------------|-----------| +| Elastic tables | 100 | Microsoft recommendation; no transaction benefit from larger batches | +| Standard tables with plugins | 100 | Reduces timeout risk; more granular parallelism | +| Standard tables, no plugins | 100-1000 | Our benchmarks show 100 is 3% faster than 1000 | +| Complex records (many columns) | 50-100 | Reduces payload size and timeout risk | + +> **Note:** Our benchmarks showed batch size 100 outperformed 1000 by 3%. Microsoft recommends starting with 100 and using higher parallelism rather than larger batches. + ## Upsert Pattern Use alternate keys for upsert operations: @@ -137,6 +148,37 @@ var accounts = externalData.Select(d => new Entity("account") await _bulk.UpsertMultipleAsync("account", accounts); ``` +## UpsertMultiple Pitfalls + +### Duplicate Key Error with Alternate Keys + +When using `UpsertMultiple` with alternate keys, set the key column in `KeyAttributes` ONLY. Do not also set it in `Attributes`. + +```csharp +// ✅ Correct - key column only in KeyAttributes +var entity = new Entity("account"); +entity.KeyAttributes["accountnumber"] = "ACCT-001"; +entity["name"] = "Contoso"; +entity["telephone1"] = "555-1234"; + +// ❌ Wrong - causes "An item with the same key has already been added" +var entity = new Entity("account"); +entity.KeyAttributes["accountnumber"] = "ACCT-001"; +entity["accountnumber"] = "ACCT-001"; // DO NOT SET THIS +entity["name"] = "Contoso"; +``` + +**Why it happens:** Dataverse's `ClassifyEntitiesForUpdateAndCreateV2` processor copies `KeyAttributes` values into `Attributes` internally. When the attribute already exists, `Dictionary.Insert` throws a duplicate key exception. + +**Symptoms:** +- Error: `An item with the same key has already been added` +- Stack trace includes `UpsertMultipleProcessor.ClassifyEntitiesForUpdateAndCreateV2` +- ALL batches fail (not just some), even though records are unique + +**Sources:** +- [Power Platform Community Thread](https://community.powerplatform.com/forums/thread/details/?threadid=b86c1b19-3f91-ef11-ac21-6045bdd3c2dc) +- [Microsoft Docs: Bulk Operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/bulk-operations) + ## Parallel Bulk Operations For very large datasets, parallelize across connections: @@ -152,3 +194,17 @@ var results = await Task.WhenAll(tasks); var totalSuccess = results.Sum(r => r.SuccessCount); var totalFailed = results.Sum(r => r.FailureCount); ``` + +### Parallelism Guidance + +Microsoft recommends using the server-provided degree of parallelism via `RecommendedDegreesOfParallelism` or the `x-ms-dop-hint` response header. Performance degrades if you exceed this value. + +> "The number and capabilities of servers allocated may vary over time, so there's no fixed number for optimum degree of parallelism." +> — [Send parallel requests (Microsoft Learn)](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/send-parallel-requests) + +## References + +- [Optimize performance for bulk operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/optimize-performance-create-update) +- [Use bulk operation messages](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/bulk-operations) +- [Send parallel requests](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/send-parallel-requests) +- [Service protection API limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) diff --git a/docs/architecture/CONNECTION_POOLING_PATTERNS.md b/docs/architecture/CONNECTION_POOLING_PATTERNS.md index 7ed67fed..d31f4a82 100644 --- a/docs/architecture/CONNECTION_POOLING_PATTERNS.md +++ b/docs/architecture/CONNECTION_POOLING_PATTERNS.md @@ -104,6 +104,30 @@ var tasks = accountIds.Select(async id => var results = await Task.WhenAll(tasks); ``` +### Using Server-Recommended Parallelism + +Microsoft provides a recommended degree of parallelism via the `x-ms-dop-hint` response header, exposed as `RecommendedDegreesOfParallelism` on the client. **Performance degrades if you exceed this value.** + +```csharp +await using var client = await _pool.GetClientAsync(); + +// Get server-recommended parallelism (typically varies by environment) +int recommendedDop = client.RecommendedDegreesOfParallelism; + +var parallelOptions = new ParallelOptions +{ + MaxDegreeOfParallelism = recommendedDop +}; + +await Parallel.ForEachAsync(records, parallelOptions, async (record, ct) => +{ + await using var innerClient = await _pool.GetClientAsync(); + await innerClient.CreateAsync(record, ct); +}); +``` + +> **Note:** The recommended value varies based on environment resources and current load. Always query it dynamically rather than hardcoding. + ## Scaling Pattern For high-throughput scenarios, use multiple Application Users: @@ -151,3 +175,23 @@ _logger.LogInformation( | `MaxLifetime` | 30m | Recycle connections after | | `DisableAffinityCookie` | true | Distribute across backend nodes | | `SelectionStrategy` | ThrottleAware | How to pick connections | + +## Performance Settings Applied Automatically + +The connection pool automatically applies Microsoft's recommended performance settings: + +```csharp +ThreadPool.SetMinThreads(100, 100); // Default is 4 +ServicePointManager.DefaultConnectionLimit = 65000; // Default is 2 +ServicePointManager.Expect100Continue = false; +ServicePointManager.UseNagleAlgorithm = false; +``` + +These are applied once when the first pool is created. If you bypass the pool and create `ServiceClient` instances directly, you must apply these settings manually. + +## References + +- [Send parallel requests](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/send-parallel-requests) - Parallelism patterns and `x-ms-dop-hint` +- [Optimize performance for bulk operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/optimize-performance-create-update) - Connection optimization settings +- [Service protection API limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) - Throttling thresholds +- [Scaling Dynamics 365 CRM Integrations in Azure](https://techcommunity.microsoft.com/blog/microsoftmissioncriticalblog/scaling-dynamics-365-crm-integrations-in-azure-the-right-way-to-use-the-sdk-s/4447143) - Clone pattern and anti-patterns diff --git a/docs/architecture/MULTI_ENVIRONMENT_SPEC.md b/docs/architecture/MULTI_ENVIRONMENT_SPEC.md new file mode 100644 index 00000000..47e1258b --- /dev/null +++ b/docs/architecture/MULTI_ENVIRONMENT_SPEC.md @@ -0,0 +1,815 @@ +# Multi-Environment Configuration & Live Migration - Design Specification + +**Status:** Approved for Implementation +**Target:** Future release (phased) +**Author:** Claude Code +**Date:** 2025-12-23 + +--- + +## Problem Statement + +Current configuration assumes a single Dataverse environment: + +```json +{ + "Dataverse": { + "Url": "https://org.crm.dynamics.com", + "Connections": [...] + } +} +``` + +This works for single-environment operations but doesn't support: +- Multiple named environments (Dev, Test, Prod) +- Data migration between environments +- Environment-specific connection configurations + +--- + +## Solution Overview + +### Phase 1: Multi-Environment Configuration +Enable named environments in configuration, each with its own URL and connections. + +### Phase 2: Live Migration - Simple Cases +Direct source-to-target data transfer for single entities without transformations. + +### Phase 3: Live Migration - Advanced +Dependency ordering, transformations, data masking, and resume capability. + +--- + +## Phase 1: Multi-Environment Configuration + +### Configuration Model + +```csharp +public class DataverseOptions +{ + /// + /// Named environment configurations. + /// If not specified, root-level Url/Connections are treated as single "Default" environment. + /// + public Dictionary? Environments { get; set; } + + /// + /// Default environment name for operations that don't specify one. + /// + public string DefaultEnvironment { get; set; } = "Default"; + + #region Single-Environment Shorthand (Backwards Compatible) + + /// + /// Dataverse URL for single-environment configuration. + /// Ignored if Environments is specified. + /// + public string? Url { get; set; } + + /// + /// Tenant ID for single-environment configuration. + /// Ignored if Environments is specified. + /// + public string? TenantId { get; set; } + + /// + /// Connections for single-environment configuration. + /// Ignored if Environments is specified. + /// + public List? Connections { get; set; } + + #endregion + + /// + /// Pool options (shared across all environments). + /// + public PoolOptions Pool { get; set; } = new(); + + /// + /// Adaptive rate control options (per-environment state, shared config). + /// + public AdaptiveRateOptions AdaptiveRate { get; set; } = new(); +} + +public class DataverseEnvironmentOptions +{ + /// + /// Dataverse environment URL. + /// + public required string Url { get; set; } + + /// + /// Azure AD tenant ID. + /// + public string? TenantId { get; set; } + + /// + /// Application User connections for this environment. + /// + public List Connections { get; set; } = new(); + + /// + /// Optional description for documentation. + /// + public string? Description { get; set; } +} +``` + +### Configuration Examples + +#### Multi-Environment + +```json +{ + "Dataverse": { + "DefaultEnvironment": "Development", + "Environments": { + "Production": { + "Description": "Production environment - handle with care", + "Url": "https://prod-org.crm.dynamics.com", + "TenantId": "00000000-0000-0000-0000-000000000000", + "Connections": [ + { + "Name": "Primary", + "ClientId": "prod-client-1", + "ClientSecretKeyVaultUri": "https://vault.azure.net/secrets/prod-primary" + }, + { + "Name": "Secondary", + "ClientId": "prod-client-2", + "ClientSecretKeyVaultUri": "https://vault.azure.net/secrets/prod-secondary" + } + ] + }, + "Development": { + "Description": "Development environment", + "Url": "https://dev-org.crm.dynamics.com", + "TenantId": "00000000-0000-0000-0000-000000000000", + "Connections": [ + { + "Name": "Primary", + "ClientId": "dev-client", + "ClientSecretVariable": "DEV_DATAVERSE_SECRET" + } + ] + }, + "UAT": { + "Description": "User acceptance testing", + "Url": "https://uat-org.crm.dynamics.com", + "TenantId": "00000000-0000-0000-0000-000000000000", + "Connections": [ + { + "Name": "Primary", + "ClientId": "uat-client", + "ClientSecretKeyVaultUri": "https://vault.azure.net/secrets/uat-primary" + } + ] + } + } + } +} +``` + +#### Single-Environment (Backwards Compatible) + +```json +{ + "Dataverse": { + "Url": "https://org.crm.dynamics.com", + "TenantId": "00000000-0000-0000-0000-000000000000", + "Connections": [ + { + "Name": "Primary", + "ClientId": "...", + "ClientSecretVariable": "DATAVERSE_SECRET" + } + ] + } +} +``` + +Internally treated as: + +```json +{ + "Dataverse": { + "DefaultEnvironment": "Default", + "Environments": { + "Default": { + "Url": "https://org.crm.dynamics.com", + "TenantId": "...", + "Connections": [...] + } + } + } +} +``` + +### Environment Resolution + +```csharp +internal class EnvironmentResolver +{ + private readonly DataverseOptions _options; + private readonly Dictionary _environments; + + public EnvironmentResolver(DataverseOptions options) + { + _options = options; + _environments = ResolveEnvironments(options); + } + + private static Dictionary ResolveEnvironments(DataverseOptions options) + { + // If Environments is specified, use it directly + if (options.Environments != null && options.Environments.Count > 0) + { + return options.Environments; + } + + // Otherwise, create implicit "Default" environment from root properties + if (string.IsNullOrEmpty(options.Url)) + { + throw new ConfigurationException("Either Environments or Url must be specified"); + } + + return new Dictionary + { + ["Default"] = new DataverseEnvironmentOptions + { + Url = options.Url, + TenantId = options.TenantId, + Connections = options.Connections ?? new List() + } + }; + } + + public DataverseEnvironmentOptions GetEnvironment(string? name = null) + { + var envName = name ?? _options.DefaultEnvironment; + + if (!_environments.TryGetValue(envName, out var env)) + { + var available = string.Join(", ", _environments.Keys); + throw new ConfigurationException( + $"Environment '{envName}' not found. Available: {available}"); + } + + return env; + } + + public IEnumerable GetEnvironmentNames() => _environments.Keys; +} +``` + +### CLI Usage + +```bash +# Uses DefaultEnvironment +ppds-migrate export --entity account --output ./data + +# Explicit environment +ppds-migrate export --env Production --entity account --output ./data + +# List configured environments +ppds-migrate environments list + +# Show environment details +ppds-migrate environments show Production +``` + +--- + +## Phase 2: Live Migration - Simple Cases + +### Overview + +Direct data transfer: Source → Memory Buffer → Target + +No intermediate files, streaming when possible. + +### CLI Usage + +```bash +# Basic live migration +ppds-migrate live --source Production --target Development --entity account + +# Multiple entities +ppds-migrate live --source Production --target Development \ + --entity account,contact,opportunity + +# With filtering +ppds-migrate live --source Production --target Development \ + --entity account \ + --filter "modifiedon gt 2024-01-01" + +# Limit records (for testing) +ppds-migrate live --source Production --target Development \ + --entity account \ + --top 100 + +# Batch configuration +ppds-migrate live --source Production --target Development \ + --entity account \ + --batch-size 100 \ + --max-parallel-batches 10 +``` + +### Architecture + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ Live Migration Pipeline │ +├─────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ Source │ │ Buffer │ │ Target │ │ +│ │ Pool │───▶│ Channel │───▶│ Pool │ │ +│ │ │ │ │ │ │ │ +│ │ • Read │ │ • Bounded │ │ • Write │ │ +│ │ • Throttle │ │ • Backpress │ │ • Throttle │ │ +│ │ • Adaptive │ │ • Batch │ │ • Adaptive │ │ +│ └─────────────┘ └─────────────┘ └─────────────┘ │ +│ │ │ │ │ +│ ▼ ▼ ▼ │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ Progress Reporter │ │ +│ │ • Records read/written │ │ +│ │ • Throughput (records/sec) │ │ +│ │ • Errors & retries │ │ +│ │ • ETA │ │ +│ └─────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +### Core Components + +```csharp +public interface ILiveMigrationService +{ + /// + /// Migrates data from source to target environment. + /// + Task MigrateAsync( + LiveMigrationOptions options, + IProgress? progress = null, + CancellationToken cancellationToken = default); +} + +public class LiveMigrationOptions +{ + /// + /// Source environment name. + /// + public required string SourceEnvironment { get; set; } + + /// + /// Target environment name. + /// + public required string TargetEnvironment { get; set; } + + /// + /// Entities to migrate. + /// + public required List Entities { get; set; } + + /// + /// Optional FetchXML filter condition. + /// + public string? Filter { get; set; } + + /// + /// Maximum records to migrate (0 = unlimited). + /// + public int TopCount { get; set; } = 0; + + /// + /// Batch size for bulk operations. + /// + public int BatchSize { get; set; } = 100; + + /// + /// Maximum parallel batches for write operations. + /// + public int MaxParallelBatches { get; set; } = 10; + + /// + /// Buffer capacity (batches). Controls backpressure. + /// + public int BufferCapacity { get; set; } = 5; + + /// + /// Operation mode for existing records. + /// + public MigrationMode Mode { get; set; } = MigrationMode.Upsert; +} + +public enum MigrationMode +{ + /// + /// Create only - fail if record exists. + /// + Create, + + /// + /// Update only - fail if record doesn't exist. + /// + Update, + + /// + /// Upsert - create or update as needed. + /// + Upsert +} + +public record LiveMigrationProgress +{ + public required string Entity { get; init; } + public required int RecordsRead { get; init; } + public required int RecordsWritten { get; init; } + public required int RecordsFailed { get; init; } + public required int TotalRecords { get; init; } + public required double RecordsPerSecond { get; init; } + public required TimeSpan Elapsed { get; init; } + public required TimeSpan? EstimatedRemaining { get; init; } + public required string? CurrentOperation { get; init; } +} + +public record LiveMigrationResult +{ + public required bool Success { get; init; } + public required int TotalRecordsRead { get; init; } + public required int TotalRecordsWritten { get; init; } + public required int TotalRecordsFailed { get; init; } + public required TimeSpan Duration { get; init; } + public required List EntityResults { get; init; } + public required List Errors { get; init; } +} + +public record LiveMigrationEntityResult +{ + public required string Entity { get; init; } + public required int RecordsRead { get; init; } + public required int RecordsWritten { get; init; } + public required int RecordsFailed { get; init; } + public required TimeSpan Duration { get; init; } +} + +public record LiveMigrationError +{ + public required string Entity { get; init; } + public required Guid? RecordId { get; init; } + public required string Message { get; init; } + public required string? ErrorCode { get; init; } +} +``` + +### Pipeline Implementation + +```csharp +internal class LiveMigrationPipeline +{ + private readonly IDataverseConnectionPool _sourcePool; + private readonly IDataverseConnectionPool _targetPool; + private readonly IBulkOperationExecutor _executor; + private readonly ILogger _logger; + + public async Task ExecuteAsync( + LiveMigrationOptions options, + IProgress? progress, + CancellationToken cancellationToken) + { + var results = new List(); + var errors = new List(); + var stopwatch = Stopwatch.StartNew(); + + foreach (var entity in options.Entities) + { + var entityResult = await MigrateEntityAsync( + entity, options, progress, errors, cancellationToken); + results.Add(entityResult); + } + + return new LiveMigrationResult + { + Success = errors.Count == 0, + TotalRecordsRead = results.Sum(r => r.RecordsRead), + TotalRecordsWritten = results.Sum(r => r.RecordsWritten), + TotalRecordsFailed = results.Sum(r => r.RecordsFailed), + Duration = stopwatch.Elapsed, + EntityResults = results, + Errors = errors + }; + } + + private async Task MigrateEntityAsync( + string entity, + LiveMigrationOptions options, + IProgress? progress, + List errors, + CancellationToken cancellationToken) + { + var stopwatch = Stopwatch.StartNew(); + var recordsRead = 0; + var recordsWritten = 0; + var recordsFailed = 0; + + // Bounded channel for backpressure + var channel = Channel.CreateBounded>( + new BoundedChannelOptions(options.BufferCapacity) + { + FullMode = BoundedChannelFullMode.Wait, + SingleReader = false, + SingleWriter = true + }); + + // Producer: Read from source + var readTask = Task.Run(async () => + { + try + { + await foreach (var batch in ReadBatchesAsync(entity, options, cancellationToken)) + { + recordsRead += batch.Count; + await channel.Writer.WriteAsync(batch, cancellationToken); + + progress?.Report(new LiveMigrationProgress + { + Entity = entity, + RecordsRead = recordsRead, + RecordsWritten = recordsWritten, + RecordsFailed = recordsFailed, + TotalRecords = 0, // Unknown until complete + RecordsPerSecond = recordsRead / stopwatch.Elapsed.TotalSeconds, + Elapsed = stopwatch.Elapsed, + EstimatedRemaining = null, + CurrentOperation = "Reading" + }); + } + } + finally + { + channel.Writer.Complete(); + } + }, cancellationToken); + + // Consumer: Write to target + var writeTask = Task.Run(async () => + { + await foreach (var batch in channel.Reader.ReadAllAsync(cancellationToken)) + { + var result = await WriteBatchAsync(entity, batch, options, cancellationToken); + recordsWritten += result.SuccessCount; + recordsFailed += result.FailureCount; + + foreach (var error in result.Errors) + { + errors.Add(new LiveMigrationError + { + Entity = entity, + RecordId = error.RecordId, + Message = error.Message, + ErrorCode = error.ErrorCode?.ToString() + }); + } + + progress?.Report(new LiveMigrationProgress + { + Entity = entity, + RecordsRead = recordsRead, + RecordsWritten = recordsWritten, + RecordsFailed = recordsFailed, + TotalRecords = recordsRead, // Updated as we read + RecordsPerSecond = recordsWritten / stopwatch.Elapsed.TotalSeconds, + Elapsed = stopwatch.Elapsed, + EstimatedRemaining = EstimateRemaining(recordsRead, recordsWritten, stopwatch.Elapsed), + CurrentOperation = "Writing" + }); + } + }, cancellationToken); + + await Task.WhenAll(readTask, writeTask); + + return new LiveMigrationEntityResult + { + Entity = entity, + RecordsRead = recordsRead, + RecordsWritten = recordsWritten, + RecordsFailed = recordsFailed, + Duration = stopwatch.Elapsed + }; + } + + private async IAsyncEnumerable> ReadBatchesAsync( + string entity, + LiveMigrationOptions options, + [EnumeratorCancellation] CancellationToken cancellationToken) + { + await using var client = await _sourcePool.GetClientAsync(cancellationToken: cancellationToken); + + var query = BuildQuery(entity, options); + var batch = new List(options.BatchSize); + + // Paging through results + string? pagingCookie = null; + var moreRecords = true; + + while (moreRecords && !cancellationToken.IsCancellationRequested) + { + query.PageInfo = new PagingInfo + { + Count = options.BatchSize, + PagingCookie = pagingCookie, + ReturnTotalRecordCount = false + }; + + var response = await client.RetrieveMultipleAsync(query, cancellationToken); + + if (response.Entities.Count > 0) + { + yield return response.Entities.ToList(); + } + + moreRecords = response.MoreRecords; + pagingCookie = response.PagingCookie; + } + } + + private async Task WriteBatchAsync( + string entity, + List batch, + LiveMigrationOptions options, + CancellationToken cancellationToken) + { + var bulkOptions = new BulkOperationOptions + { + BatchSize = options.BatchSize, + MaxParallelBatches = options.MaxParallelBatches + }; + + return options.Mode switch + { + MigrationMode.Create => await _executor.CreateMultipleAsync(batch, bulkOptions, cancellationToken), + MigrationMode.Update => await _executor.UpdateMultipleAsync(batch, bulkOptions, cancellationToken), + MigrationMode.Upsert => await _executor.UpsertMultipleAsync(batch, bulkOptions, cancellationToken), + _ => throw new NotSupportedException($"Mode '{options.Mode}' not supported") + }; + } +} +``` + +### Backpressure Handling + +The bounded channel naturally handles backpressure: + +``` +Source reads fast, target throttled: + → Channel fills to BufferCapacity + → channel.Writer.WriteAsync blocks + → Source slows down automatically + → No memory explosion + +Target catches up: + → Channel has space + → Source resumes reading + → Pipeline flows smoothly +``` + +--- + +## Phase 3: Live Migration - Advanced (Future) + +### Dependency Ordering + +```csharp +public class LiveMigrationOptions +{ + // ... existing properties ... + + /// + /// Automatically order entities by dependencies. + /// Parents migrated before children. + /// + public bool AutoOrderByDependencies { get; set; } = true; +} +``` + +Analysis: Account → Contact → Opportunity (Contact.ParentCustomerId references Account) + +### Transformations + +```csharp +public class LiveMigrationOptions +{ + // ... existing properties ... + + /// + /// Transformations to apply during migration. + /// + public List? Transformations { get; set; } +} + +public interface IRecordTransformation +{ + /// + /// Applies transformation to a record. + /// Return null to skip the record. + /// + Entity? Transform(Entity record, TransformationContext context); +} + +// Built-in transformations +public class FieldMappingTransformation : IRecordTransformation { } +public class DataMaskingTransformation : IRecordTransformation { } +public class LookupRemappingTransformation : IRecordTransformation { } +public class ExcludeFieldsTransformation : IRecordTransformation { } +``` + +### Data Masking (PII Protection) + +```json +{ + "Transformations": [ + { + "Type": "DataMasking", + "Rules": [ + { "Field": "emailaddress1", "Method": "Email" }, + { "Field": "telephone1", "Method": "Phone" }, + { "Field": "address1_line1", "Method": "Redact" } + ] + } + ] +} +``` + +### Resume Capability + +```csharp +public class LiveMigrationOptions +{ + // ... existing properties ... + + /// + /// Checkpoint file for resume capability. + /// + public string? CheckpointFile { get; set; } + + /// + /// Resume from previous checkpoint if available. + /// + public bool Resume { get; set; } = false; +} +``` + +```bash +# Start migration with checkpoint +ppds-migrate live --source Prod --target Dev \ + --entity account,contact \ + --checkpoint ./migration.checkpoint + +# Resume after failure +ppds-migrate live --source Prod --target Dev \ + --entity account,contact \ + --checkpoint ./migration.checkpoint \ + --resume +``` + +--- + +## File Changes + +### Phase 1 + +| File | Change | +|------|--------| +| `src/PPDS.Dataverse/DependencyInjection/DataverseOptions.cs` | Add Environments property | +| `src/PPDS.Dataverse/DependencyInjection/DataverseEnvironmentOptions.cs` | New class | +| `src/PPDS.Dataverse/Configuration/EnvironmentResolver.cs` | New class | +| `src/PPDS.Migration.Cli/Commands/EnvironmentsCommand.cs` | New command | + +### Phase 2 + +| File | Change | +|------|--------| +| `src/PPDS.Migration/Live/ILiveMigrationService.cs` | New interface | +| `src/PPDS.Migration/Live/LiveMigrationService.cs` | New implementation | +| `src/PPDS.Migration/Live/LiveMigrationPipeline.cs` | New pipeline | +| `src/PPDS.Migration/Live/LiveMigrationOptions.cs` | New options | +| `src/PPDS.Migration.Cli/Commands/LiveCommand.cs` | New command | + +### Phase 3 + +| File | Change | +|------|--------| +| `src/PPDS.Migration/Transformations/` | New transformation classes | +| `src/PPDS.Migration/Live/CheckpointManager.cs` | New checkpoint handling | +| `src/PPDS.Migration/Analysis/DependencyAnalyzer.cs` | New dependency analysis | + +--- + +## References + +- [Dataverse Bulk Operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/bulk-operations) +- [System.Threading.Channels](https://learn.microsoft.com/en-us/dotnet/core/extensions/channels) +- [Producer-Consumer Patterns](https://learn.microsoft.com/en-us/dotnet/standard/parallel-programming/how-to-implement-a-producer-consumer-dataflow-pattern) diff --git a/docs/architecture/STRUCTURED_CONFIGURATION_SPEC.md b/docs/architecture/STRUCTURED_CONFIGURATION_SPEC.md new file mode 100644 index 00000000..4485a45a --- /dev/null +++ b/docs/architecture/STRUCTURED_CONFIGURATION_SPEC.md @@ -0,0 +1,680 @@ +# Structured Configuration - Design Specification + +**Status:** Approved for Implementation +**Target:** Future release +**Author:** Claude Code +**Date:** 2025-12-23 + +--- + +## Problem Statement + +Current configuration requires raw connection strings: + +```json +{ + "Dataverse": { + "Connections": [ + { + "Name": "Primary", + "ConnectionString": "AuthType=ClientSecret;Url=https://org.crm.dynamics.com;ClientId=xxx;ClientSecret=yyy;TenantId=zzz" + } + ] + } +} +``` + +### Issues + +| Problem | Impact | +|---------|--------| +| **Secrets in config** | Easy to commit to source control, visible in logs | +| **Error prone** | Syntax errors, typos, no validation until runtime | +| **Duplication** | Same Url/TenantId repeated across connections | +| **Hard to compose** | Can't override just URL for different environments | +| **No IntelliSense** | No IDE help, just a string | +| **No Key Vault support** | Must inline secrets or build custom resolution | + +The existence of `ConnectionStringRedactor` proves we're already fighting secret leakage. + +--- + +## Solution: Typed Configuration with Secret Resolution + +Replace connection strings with structured, typed configuration that: +- Separates secrets from config files +- Supports multiple authentication types +- Enables Key Vault and environment variable resolution +- Provides validation at startup +- Maintains backwards compatibility + +--- + +## Configuration Model + +### Root Options + +```csharp +public class DataverseOptions +{ + /// + /// Default Dataverse environment URL. Inherited by connections if not specified. + /// Example: https://org.crm.dynamics.com + /// + public string? Url { get; set; } + + /// + /// Default Azure AD tenant ID. Inherited by connections if not specified. + /// + public string? TenantId { get; set; } + + /// + /// Connection configurations for Application Users. + /// + public List Connections { get; set; } = new(); + + /// + /// Connection pool settings. + /// + public PoolOptions Pool { get; set; } = new(); + + /// + /// Adaptive rate control settings. + /// + public AdaptiveRateOptions AdaptiveRate { get; set; } = new(); +} +``` + +### Connection Options + +```csharp +public class DataverseConnectionOptions +{ + /// + /// Connection name for identification and logging. + /// + public required string Name { get; set; } + + /// + /// Dataverse environment URL. Overrides root Url if specified. + /// + public string? Url { get; set; } + + /// + /// Azure AD tenant ID. Overrides root TenantId if specified. + /// + public string? TenantId { get; set; } + + /// + /// Authentication type. Default: ClientSecret. + /// + public DataverseAuthType AuthType { get; set; } = DataverseAuthType.ClientSecret; + + /// + /// Azure AD application (client) ID. Required for ClientSecret and Certificate auth. + /// + public string? ClientId { get; set; } + + #region Secret Resolution (Priority Order) + + /// + /// Azure Key Vault URI for client secret. Highest priority. + /// Example: https://myvault.vault.azure.net/secrets/dataverse-secret + /// + public string? ClientSecretKeyVaultUri { get; set; } + + /// + /// Environment variable name containing client secret. Second priority. + /// Example: DATAVERSE_CLIENT_SECRET + /// + public string? ClientSecretVariable { get; set; } + + /// + /// Direct client secret value. Lowest priority. + /// NOT RECOMMENDED for production - use KeyVault or environment variable. + /// + [Obsolete("Use ClientSecretKeyVaultUri or ClientSecretVariable for production")] + public string? ClientSecret { get; set; } + + #endregion + + #region Certificate Auth + + /// + /// Certificate thumbprint. Required for Certificate auth. + /// + public string? CertificateThumbprint { get; set; } + + /// + /// Certificate store name. Default: My. + /// + public StoreName CertificateStoreName { get; set; } = StoreName.My; + + /// + /// Certificate store location. Default: CurrentUser. + /// + public StoreLocation CertificateStoreLocation { get; set; } = StoreLocation.CurrentUser; + + /// + /// Path to PFX certificate file. Alternative to store-based certificate. + /// + public string? CertificatePath { get; set; } + + /// + /// Environment variable containing PFX password. + /// + public string? CertificatePasswordVariable { get; set; } + + #endregion + + #region OAuth (Interactive) + + /// + /// OAuth redirect URI. Required for OAuth auth. + /// + public string? RedirectUri { get; set; } + + /// + /// OAuth login prompt behavior. Default: Auto. + /// + public OAuthLoginPrompt LoginPrompt { get; set; } = OAuthLoginPrompt.Auto; + + #endregion + + /// + /// Raw connection string. Escape hatch for unsupported scenarios. + /// If set, overrides all other properties. + /// + public string? ConnectionString { get; set; } +} +``` + +### Authentication Types + +```csharp +public enum DataverseAuthType +{ + /// + /// App registration with client secret. Most common for server-to-server. + /// + ClientSecret, + + /// + /// App registration with certificate. More secure than client secret. + /// + Certificate, + + /// + /// Azure Managed Identity. Best for Azure-hosted services. No secrets needed. + /// + ManagedIdentity, + + /// + /// Interactive OAuth. For desktop apps, not recommended for servers. + /// + OAuth, + + /// + /// Raw connection string. Escape hatch for unsupported auth types. + /// + ConnectionString +} + +public enum OAuthLoginPrompt +{ + Auto, + Always, + Never, + SelectAccount +} +``` + +--- + +## Configuration Examples + +### Development (Environment Variable) + +```json +{ + "Dataverse": { + "Url": "https://dev-org.crm.dynamics.com", + "TenantId": "00000000-0000-0000-0000-000000000000", + "Connections": [ + { + "Name": "Primary", + "ClientId": "11111111-1111-1111-1111-111111111111", + "ClientSecretVariable": "DATAVERSE_SECRET" + } + ] + } +} +``` + +Run with: `DATAVERSE_SECRET=my-secret dotnet run` + +### Production (Azure Key Vault) + +```json +{ + "Dataverse": { + "Url": "https://prod-org.crm.dynamics.com", + "TenantId": "00000000-0000-0000-0000-000000000000", + "Connections": [ + { + "Name": "Primary", + "ClientId": "11111111-1111-1111-1111-111111111111", + "ClientSecretKeyVaultUri": "https://myvault.vault.azure.net/secrets/dataverse-primary" + }, + { + "Name": "Secondary", + "ClientId": "22222222-2222-2222-2222-222222222222", + "ClientSecretKeyVaultUri": "https://myvault.vault.azure.net/secrets/dataverse-secondary" + } + ] + } +} +``` + +### Azure Functions (Managed Identity) + +```json +{ + "Dataverse": { + "Url": "https://org.crm.dynamics.com", + "Connections": [ + { + "Name": "Primary", + "AuthType": "ManagedIdentity" + } + ] + } +} +``` + +No secrets needed! Azure handles authentication. + +### Certificate Authentication + +```json +{ + "Dataverse": { + "Url": "https://org.crm.dynamics.com", + "TenantId": "00000000-0000-0000-0000-000000000000", + "Connections": [ + { + "Name": "Primary", + "AuthType": "Certificate", + "ClientId": "11111111-1111-1111-1111-111111111111", + "CertificateThumbprint": "ABC123DEF456...", + "CertificateStoreLocation": "LocalMachine" + } + ] + } +} +``` + +### Legacy (Raw Connection String) + +```json +{ + "Dataverse": { + "Connections": [ + { + "Name": "Primary", + "ConnectionString": "AuthType=ClientSecret;Url=https://org.crm.dynamics.com;ClientId=...;ClientSecret=...;TenantId=..." + } + ] + } +} +``` + +Still supported for backwards compatibility and edge cases. + +--- + +## Connection String Builder + +```csharp +internal static class ConnectionStringBuilder +{ + public static string Build(DataverseConnectionOptions connection, DataverseOptions root) + { + // Escape hatch: raw connection string takes precedence + if (!string.IsNullOrEmpty(connection.ConnectionString)) + { + return connection.ConnectionString; + } + + // Inherit from root + var url = connection.Url ?? root.Url + ?? throw new ConfigurationException($"Url required for connection '{connection.Name}'"); + var tenantId = connection.TenantId ?? root.TenantId; + + return connection.AuthType switch + { + DataverseAuthType.ClientSecret => BuildClientSecret(url, tenantId, connection), + DataverseAuthType.Certificate => BuildCertificate(url, tenantId, connection), + DataverseAuthType.ManagedIdentity => BuildManagedIdentity(url), + DataverseAuthType.OAuth => BuildOAuth(url, connection), + DataverseAuthType.ConnectionString => connection.ConnectionString + ?? throw new ConfigurationException($"ConnectionString required for connection '{connection.Name}'"), + _ => throw new ConfigurationException($"Unsupported AuthType '{connection.AuthType}'") + }; + } + + private static string BuildClientSecret(string url, string? tenantId, DataverseConnectionOptions connection) + { + var clientId = connection.ClientId + ?? throw new ConfigurationException($"ClientId required for connection '{connection.Name}'"); + var secret = ResolveSecret(connection) + ?? throw new ConfigurationException($"Client secret required for connection '{connection.Name}'"); + + var sb = new StringBuilder(); + sb.Append($"AuthType=ClientSecret;Url={url};ClientId={clientId};ClientSecret={secret}"); + + if (!string.IsNullOrEmpty(tenantId)) + { + sb.Append($";TenantId={tenantId}"); + } + + return sb.ToString(); + } + + private static string BuildManagedIdentity(string url) + { + return $"AuthType=ManagedIdentity;Url={url}"; + } + + private static string BuildCertificate(string url, string? tenantId, DataverseConnectionOptions connection) + { + var clientId = connection.ClientId + ?? throw new ConfigurationException($"ClientId required for connection '{connection.Name}'"); + var thumbprint = connection.CertificateThumbprint + ?? throw new ConfigurationException($"CertificateThumbprint required for connection '{connection.Name}'"); + + var sb = new StringBuilder(); + sb.Append($"AuthType=Certificate;Url={url};ClientId={clientId};Thumbprint={thumbprint}"); + + if (!string.IsNullOrEmpty(tenantId)) + { + sb.Append($";TenantId={tenantId}"); + } + + sb.Append($";StoreLocation={connection.CertificateStoreLocation}"); + + return sb.ToString(); + } + + private static string BuildOAuth(string url, DataverseConnectionOptions connection) + { + var clientId = connection.ClientId + ?? throw new ConfigurationException($"ClientId required for connection '{connection.Name}'"); + var redirectUri = connection.RedirectUri + ?? throw new ConfigurationException($"RedirectUri required for connection '{connection.Name}'"); + + return $"AuthType=OAuth;Url={url};ClientId={clientId};RedirectUri={redirectUri};LoginPrompt={connection.LoginPrompt}"; + } +} +``` + +--- + +## Secret Resolution + +```csharp +internal static class SecretResolver +{ + public static string? ResolveSecret(DataverseConnectionOptions connection) + { + // Priority 1: Azure Key Vault + if (!string.IsNullOrEmpty(connection.ClientSecretKeyVaultUri)) + { + return ResolveFromKeyVault(connection.ClientSecretKeyVaultUri); + } + + // Priority 2: Environment Variable + if (!string.IsNullOrEmpty(connection.ClientSecretVariable)) + { + var value = Environment.GetEnvironmentVariable(connection.ClientSecretVariable); + if (string.IsNullOrEmpty(value)) + { + throw new ConfigurationException( + $"Environment variable '{connection.ClientSecretVariable}' not found or empty " + + $"for connection '{connection.Name}'"); + } + return value; + } + + // Priority 3: Direct value (not recommended) + #pragma warning disable CS0618 + return connection.ClientSecret; + #pragma warning restore CS0618 + } + + private static string ResolveFromKeyVault(string secretUri) + { + // Use Azure.Identity DefaultAzureCredential for Key Vault access + var credential = new DefaultAzureCredential(); + var client = new SecretClient(new Uri(GetVaultUri(secretUri)), credential); + + var secretName = GetSecretName(secretUri); + var secret = client.GetSecret(secretName); + + return secret.Value.Value; + } + + private static string GetVaultUri(string secretUri) + { + // https://myvault.vault.azure.net/secrets/mysecret -> https://myvault.vault.azure.net + var uri = new Uri(secretUri); + return $"{uri.Scheme}://{uri.Host}"; + } + + private static string GetSecretName(string secretUri) + { + // https://myvault.vault.azure.net/secrets/mysecret -> mysecret + var uri = new Uri(secretUri); + var segments = uri.AbsolutePath.Split('/'); + return segments[^1]; + } +} +``` + +--- + +## Validation + +```csharp +internal static class ConfigurationValidator +{ + public static void Validate(DataverseOptions options) + { + if (options.Connections.Count == 0) + { + throw new ConfigurationException("At least one connection must be configured"); + } + + var names = new HashSet(StringComparer.OrdinalIgnoreCase); + foreach (var connection in options.Connections) + { + ValidateConnection(connection, options, names); + } + } + + private static void ValidateConnection( + DataverseConnectionOptions connection, + DataverseOptions root, + HashSet names) + { + // Name required and unique + if (string.IsNullOrWhiteSpace(connection.Name)) + { + throw new ConfigurationException("Connection Name is required"); + } + + if (!names.Add(connection.Name)) + { + throw new ConfigurationException($"Duplicate connection name: '{connection.Name}'"); + } + + // Raw connection string bypasses other validation + if (!string.IsNullOrEmpty(connection.ConnectionString)) + { + return; + } + + // URL required (from connection or root) + if (string.IsNullOrEmpty(connection.Url) && string.IsNullOrEmpty(root.Url)) + { + throw new ConfigurationException($"Url required for connection '{connection.Name}'"); + } + + // Auth-type specific validation + switch (connection.AuthType) + { + case DataverseAuthType.ClientSecret: + ValidateClientSecret(connection); + break; + case DataverseAuthType.Certificate: + ValidateCertificate(connection); + break; + case DataverseAuthType.ManagedIdentity: + // No additional requirements + break; + case DataverseAuthType.OAuth: + ValidateOAuth(connection); + break; + } + } + + private static void ValidateClientSecret(DataverseConnectionOptions connection) + { + if (string.IsNullOrEmpty(connection.ClientId)) + { + throw new ConfigurationException($"ClientId required for connection '{connection.Name}'"); + } + + #pragma warning disable CS0618 + var hasSecret = !string.IsNullOrEmpty(connection.ClientSecretKeyVaultUri) + || !string.IsNullOrEmpty(connection.ClientSecretVariable) + || !string.IsNullOrEmpty(connection.ClientSecret); + #pragma warning restore CS0618 + + if (!hasSecret) + { + throw new ConfigurationException( + $"Client secret required for connection '{connection.Name}'. " + + "Use ClientSecretKeyVaultUri, ClientSecretVariable, or ClientSecret."); + } + } + + private static void ValidateCertificate(DataverseConnectionOptions connection) + { + if (string.IsNullOrEmpty(connection.ClientId)) + { + throw new ConfigurationException($"ClientId required for connection '{connection.Name}'"); + } + + var hasThumbprint = !string.IsNullOrEmpty(connection.CertificateThumbprint); + var hasPath = !string.IsNullOrEmpty(connection.CertificatePath); + + if (!hasThumbprint && !hasPath) + { + throw new ConfigurationException( + $"CertificateThumbprint or CertificatePath required for connection '{connection.Name}'"); + } + } + + private static void ValidateOAuth(DataverseConnectionOptions connection) + { + if (string.IsNullOrEmpty(connection.ClientId)) + { + throw new ConfigurationException($"ClientId required for connection '{connection.Name}'"); + } + + if (string.IsNullOrEmpty(connection.RedirectUri)) + { + throw new ConfigurationException($"RedirectUri required for connection '{connection.Name}'"); + } + } +} +``` + +--- + +## File Changes + +| File | Change | +|------|--------| +| `src/PPDS.Dataverse/DependencyInjection/DataverseConnectionOptions.cs` | New structured options | +| `src/PPDS.Dataverse/DependencyInjection/DataverseAuthType.cs` | New auth type enum | +| `src/PPDS.Dataverse/Configuration/ConnectionStringBuilder.cs` | New builder | +| `src/PPDS.Dataverse/Configuration/SecretResolver.cs` | New secret resolution | +| `src/PPDS.Dataverse/Configuration/ConfigurationValidator.cs` | New validation | +| `src/PPDS.Dataverse/Configuration/ConfigurationException.cs` | New exception type | +| `src/PPDS.Dataverse/Pooling/DataverseConnectionPool.cs` | Use builder instead of raw string | +| `tests/PPDS.Dataverse.Tests/Configuration/` | New test files | + +--- + +## Dependencies + +```xml + + + +``` + +Note: These are optional - Key Vault resolution only attempted if `ClientSecretKeyVaultUri` is used. + +--- + +## Migration Guide + +### From Raw Connection String + +**Before:** +```json +{ + "Dataverse": { + "Connections": [ + { + "Name": "Primary", + "ConnectionString": "AuthType=ClientSecret;Url=https://org.crm.dynamics.com;ClientId=xxx;ClientSecret=yyy;TenantId=zzz" + } + ] + } +} +``` + +**After:** +```json +{ + "Dataverse": { + "Url": "https://org.crm.dynamics.com", + "TenantId": "zzz", + "Connections": [ + { + "Name": "Primary", + "ClientId": "xxx", + "ClientSecretVariable": "DATAVERSE_SECRET" + } + ] + } +} +``` + +Then set environment variable: `DATAVERSE_SECRET=yyy` + +### Backwards Compatibility + +Raw `ConnectionString` property is still supported as escape hatch. Existing configs continue to work unchanged. + +--- + +## References + +- [Azure.Identity Documentation](https://learn.microsoft.com/en-us/dotnet/api/azure.identity) +- [Key Vault Secrets Client](https://learn.microsoft.com/en-us/dotnet/api/azure.security.keyvault.secrets) +- [Dataverse Connection Strings](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/xrm-tooling/use-connection-strings-xrm-tooling-connect) diff --git a/docs/specs/CONNECTION_HEALTH_MANAGEMENT.md b/docs/specs/CONNECTION_HEALTH_MANAGEMENT.md new file mode 100644 index 00000000..c3ee2238 --- /dev/null +++ b/docs/specs/CONNECTION_HEALTH_MANAGEMENT.md @@ -0,0 +1,766 @@ +# Specification: Connection Health Management and Failure Recovery + +**Status:** Draft +**Author:** Claude Code +**Date:** 2025-12-22 +**Priority:** High + +--- + +## Problem Statement + +The PPDS.Dataverse SDK needs to support "always-on" integration scenarios where connection pools run indefinitely. The current implementation has gaps: + +1. **Fixed `MaxLifetime = 30min`** forces connection recycling even when connections are healthy, causing unnecessary churn. + +2. **No failure detection during operations** - if a connection fails mid-operation (auth failure, network issue), the error bubbles up without recovery. + +3. **No connection validation on checkout** - invalid connections can be returned from the pool. + +4. **No graceful recovery** - when a connection fails, the operation fails. For enterprise migrations, we need retry-with-new-connection. + +For data migrations handling millions of records, connection failures must not cause data loss. The system should detect failures, recover gracefully, and continue processing. + +--- + +## Microsoft's Guidance + +### Token Lifecycle + +From [OAuth authentication](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/authenticate-oauth): + +- Access tokens expire in **~60-75 minutes** +- MSAL caches tokens and refreshes them automatically +- ServiceClient checks token expiry and refreshes **1 minute before expiration** + +### ServiceClient Token Refresh + +From [ServiceClient source](https://github.com/microsoft/PowerPlatform-DataverseServiceClient): + +```csharp +// Internal logic (paraphrased) +if (token.ExpiresOn < DateTime.UtcNow.AddMinutes(1)) +{ + RefreshToken(); +} +``` + +This means ServiceClient handles token refresh internally for the happy path. The risk is when refresh **fails**. + +### Authentication Patterns for Long-Running Apps + +**Pattern 1: Connection String (Current)** +```csharp +var client = new ServiceClient(connectionString); +// ServiceClient manages tokens internally via MSAL +``` + +**Pattern 2: External Token Provider** +```csharp +var client = new ServiceClient( + new ConnectionOptions + { + ServiceUri = new Uri("https://org.crm.dynamics.com"), + AuthenticationType = AuthenticationType.ExternalTokenManagement, + AccessTokenProviderFunctionAsync = async (uri) => await GetTokenAsync(uri) + }); +``` + +**Note:** Pattern 2 has known issues - [GitHub #377](https://github.com/microsoft/PowerPlatform-DataverseServiceClient/issues/377) reports excessive token acquisition calls. + +### References + +- [OAuth authentication](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/authenticate-oauth) +- [ServiceClient Class](https://learn.microsoft.com/en-us/dotnet/api/microsoft.powerplatform.dataverse.client.serviceclient) +- [ConnectionOptions](https://learn.microsoft.com/en-us/dotnet/api/microsoft.powerplatform.dataverse.client.model.connectionoptions) +- [ServiceClient Source](https://github.com/microsoft/PowerPlatform-DataverseServiceClient) + +--- + +## Current State + +### Connection Pool Configuration + +**File:** `Pooling/ConnectionPoolOptions.cs` + +| Setting | Current Default | Purpose | +|---------|-----------------|---------| +| `MaxPoolSize` | 50 | Maximum connections | +| `MinPoolSize` | 5 | Minimum idle connections | +| `MaxIdleTime` | 5 minutes | Evict idle connections | +| `MaxLifetime` | 30 minutes | Force recycle all connections | +| `AcquireTimeout` | 30 seconds | Timeout waiting for connection | + +### Current Lifecycle + +``` +Connection Created + │ + ▼ + [In Pool] + │ + ├──── Age > MaxLifetime (30min) ──────► Disposed + │ + ├──── Idle > MaxIdleTime (5min) ──────► Disposed + │ + └──── Checked Out ──────► Used ──────► Returned to Pool +``` + +### What's Missing + +1. **Validation on checkout** - Pool returns connections without checking `IsReady` +2. **Failure recovery during operations** - Exceptions bubble up without retry +3. **Auth failure handling** - No special handling for authentication errors +4. **Health monitoring** - No proactive health checks on idle connections +5. **Connection invalidation** - No way to mark a connection as bad + +--- + +## Proposed Solution + +### Architecture + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ Connection Pool │ +├─────────────────────────────────────────────────────────────────────┤ +│ │ +│ GetClientAsync() │ +│ ┌─────────────────────────────────────────────────────────────┐ │ +│ │ 1. Get connection from internal pool │ │ +│ │ 2. Validate connection: │ │ +│ │ - IsReady == true? │ │ +│ │ - Age < MaxLifetime? │ │ +│ │ - Not marked invalid? │ │ +│ │ 3. If invalid: │ │ +│ │ - Dispose connection │ │ +│ │ - Get/create another │ │ +│ │ - Repeat validation │ │ +│ │ 4. Return healthy connection │ │ +│ └─────────────────────────────────────────────────────────────┘ │ +│ │ +│ ReturnToPool(connection) │ +│ ┌─────────────────────────────────────────────────────────────┐ │ +│ │ 1. Check if marked invalid │ │ +│ │ 2. If invalid → Dispose, don't return │ │ +│ │ 3. If valid → Return to pool │ │ +│ └─────────────────────────────────────────────────────────────┘ │ +│ │ +│ MarkInvalid(connection) │ +│ ┌─────────────────────────────────────────────────────────────┐ │ +│ │ 1. Mark connection as invalid │ │ +│ │ 2. Log the reason │ │ +│ │ 3. Increment failure counter │ │ +│ │ 4. On next return/checkout → Will be disposed │ │ +│ └─────────────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +### Failure Recovery Flow + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ BulkOperationExecutor │ +├─────────────────────────────────────────────────────────────────────┤ +│ ExecuteWithFailureRecoveryAsync() │ +│ ┌─────────────────────────────────────────────────────────────┐ │ +│ │ 1. Get connection from pool │ │ +│ │ 2. Try execute operation │ │ +│ │ 3. On success → return result │ │ +│ │ 4. On auth failure: │ │ +│ │ a. Mark connection as invalid │ │ +│ │ b. Dispose connection (don't return to pool) │ │ +│ │ c. Get new connection │ │ +│ │ d. Retry operation │ │ +│ │ 5. On connection failure: │ │ +│ │ a. Same as auth failure │ │ +│ │ 6. On other failure: │ │ +│ │ a. Return connection to pool normally │ │ +│ │ b. Throw exception to caller │ │ +│ └─────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Implementation Details + +### 1. Add Connection Validation to Pool + +**File:** `Pooling/DataverseConnectionPool.cs` + +```csharp +/// +/// Validates that a connection is healthy and usable. +/// +/// The client to validate. +/// True if the connection is healthy. +private bool IsConnectionHealthy(PooledDataverseClient client) +{ + // Check if ServiceClient reports ready + if (!client.IsReady) + { + _logger.LogDebug( + "Connection not ready. ConnectionId: {ConnectionId}", + client.ConnectionId); + return false; + } + + // Check age against MaxLifetime + var age = DateTime.UtcNow - client.CreatedAt; + if (age > _options.Pool.MaxLifetime) + { + _logger.LogDebug( + "Connection exceeded max lifetime. ConnectionId: {ConnectionId}, Age: {Age}", + client.ConnectionId, age); + return false; + } + + // Check if marked as invalid + if (client.IsInvalid) + { + _logger.LogDebug( + "Connection marked invalid. ConnectionId: {ConnectionId}", + client.ConnectionId); + return false; + } + + return true; +} + +public async ValueTask GetClientAsync( + DataverseClientOptions? options = null, + CancellationToken cancellationToken = default) +{ + var maxAttempts = 3; // Prevent infinite loops + var attempts = 0; + + while (attempts < maxAttempts) + { + attempts++; + var client = await GetClientFromPoolAsync(options, cancellationToken); + + if (IsConnectionHealthy(client)) + { + return WrapClient(client, options); + } + + // Unhealthy - dispose and try again + _logger.LogInformation( + "Connection failed health check. Disposing and getting another. " + + "ConnectionId: {ConnectionId}, Attempt: {Attempt}", + client.ConnectionId, attempts); + + await DisposeConnectionAsync(client); + } + + throw new PoolExhaustedException( + "Failed to get healthy connection after multiple attempts"); +} +``` + +### 2. Add Connection Invalidation Support + +**File:** `Pooling/PooledDataverseClient.cs` + +```csharp +public class PooledDataverseClient : IDataverseClient +{ + // ... existing members ... + + /// + /// Gets or sets whether this connection has been marked as invalid. + /// Invalid connections will be disposed instead of returned to the pool. + /// + public bool IsInvalid { get; private set; } + + /// + /// Gets the reason the connection was marked invalid, if any. + /// + public string? InvalidReason { get; private set; } + + /// + /// Marks this connection as invalid. It will be disposed on return to pool. + /// + /// The reason for invalidation. + public void MarkInvalid(string reason) + { + IsInvalid = true; + InvalidReason = reason; + } +} +``` + +### 3. Add IDataverseClient.MarkInvalid() + +**File:** `Client/IDataverseClient.cs` + +```csharp +public interface IDataverseClient : IAsyncDisposable, IDisposable +{ + // ... existing members ... + + /// + /// Gets whether this connection has been marked as invalid. + /// + bool IsInvalid { get; } + + /// + /// Marks this connection as invalid. It will not be returned to the pool. + /// Call this when an unrecoverable error occurs (auth failure, etc.). + /// + /// The reason for invalidation (for logging). + void MarkInvalid(string reason); +} +``` + +### 4. Update Return-to-Pool Logic + +**File:** `Pooling/DataverseConnectionPool.cs` + +```csharp +internal async Task ReturnConnectionAsync(PooledDataverseClient client) +{ + if (client.IsInvalid) + { + _logger.LogInformation( + "Connection marked invalid, disposing instead of returning. " + + "ConnectionId: {ConnectionId}, Reason: {Reason}", + client.ConnectionId, client.InvalidReason); + + await DisposeConnectionAsync(client); + Interlocked.Increment(ref _invalidConnectionCount); + return; + } + + // ... existing return-to-pool logic ... +} +``` + +### 5. Add Auth Failure Detection + +**File:** `BulkOperations/BulkOperationExecutor.cs` + +```csharp +/// +/// Checks if an exception indicates an authentication/authorization failure. +/// +private static bool IsAuthFailure(Exception exception) +{ + // Check for common auth failure patterns + if (exception is FaultException faultEx) + { + var fault = faultEx.Detail; + + // Common auth error codes + // -2147180286: Caller does not have privilege + // -2147204720: User is disabled + // -2147180285: AccessDenied + var authErrorCodes = new[] + { + -2147180286, // No privilege + -2147204720, // User disabled + -2147180285, // Access denied + }; + + if (authErrorCodes.Contains(fault.ErrorCode)) + { + return true; + } + + // Check message for auth-related keywords + var message = fault.Message?.ToLowerInvariant() ?? ""; + if (message.Contains("authentication") || + message.Contains("authorization") || + message.Contains("token") || + message.Contains("expired") || + message.Contains("credential")) + { + return true; + } + } + + // Check for HTTP 401/403 in inner exceptions + if (exception.InnerException is HttpRequestException httpEx) + { + var message = httpEx.Message?.ToLowerInvariant() ?? ""; + if (message.Contains("401") || message.Contains("403") || + message.Contains("unauthorized") || message.Contains("forbidden")) + { + return true; + } + } + + return false; +} + +/// +/// Checks if an exception indicates a connection failure. +/// +private static bool IsConnectionFailure(Exception exception) +{ + return exception is HttpRequestException || + exception is TaskCanceledException || + exception is OperationCanceledException || + exception is SocketException || + exception.InnerException is SocketException; +} +``` + +### 6. Add Failure Recovery Wrapper + +**File:** `BulkOperations/BulkOperationExecutor.cs` + +```csharp +/// +/// Executes an operation with automatic failure recovery. +/// On auth or connection failures, marks the connection invalid and retries with a new one. +/// +private async Task ExecuteWithFailureRecoveryAsync( + Func> operation, + int maxRetries, + CancellationToken cancellationToken) +{ + var attempts = 0; + Exception? lastException = null; + + while (attempts < maxRetries) + { + attempts++; + IDataverseClient? client = null; + + try + { + client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken); + return await operation(client, cancellationToken); + } + catch (Exception ex) when (IsAuthFailure(ex)) + { + lastException = ex; + + _logger.LogWarning( + "Authentication failure on connection {Connection}. " + + "Marking invalid and retrying. Attempt: {Attempt}/{MaxRetries}. Error: {Error}", + client?.ConnectionName, attempts, maxRetries, ex.Message); + + // Mark connection as invalid - it won't be returned to pool + client?.MarkInvalid($"Auth failure: {ex.Message}"); + + // Don't wait - immediately try with new connection + continue; + } + catch (Exception ex) when (IsConnectionFailure(ex)) + { + lastException = ex; + + _logger.LogWarning( + "Connection failure on {Connection}. " + + "Marking invalid and retrying. Attempt: {Attempt}/{MaxRetries}. Error: {Error}", + client?.ConnectionName, attempts, maxRetries, ex.Message); + + client?.MarkInvalid($"Connection failure: {ex.Message}"); + continue; + } + finally + { + // Dispose will check IsInvalid and handle appropriately + if (client is IAsyncDisposable asyncDisposable) + { + await asyncDisposable.DisposeAsync(); + } + } + } + + throw new DataverseConnectionException( + "Operation failed after multiple attempts", + lastException); +} +``` + +### 7. Add Health Check Configuration + +**File:** `Pooling/ConnectionPoolOptions.cs` + +```csharp +public class ConnectionPoolOptions +{ + // ... existing properties ... + + /// + /// Maximum lifetime for a connection before it's recycled. + /// Set higher for stable long-running scenarios. + /// Default: 60 minutes (within OAuth token validity window) + /// + public TimeSpan MaxLifetime { get; set; } = TimeSpan.FromMinutes(60); + + /// + /// Whether to validate connection health on checkout. + /// When true, connections are checked for IsReady, age, and validity before being returned. + /// Default: true + /// + public bool ValidateOnCheckout { get; set; } = true; + + /// + /// Maximum number of retry attempts for auth/connection failures. + /// Default: 2 + /// + public int MaxConnectionRetries { get; set; } = 2; + + /// + /// Whether to enable proactive health monitoring of idle connections. + /// When true, a background task periodically checks idle connections. + /// Default: true + /// + public bool EnableHealthMonitoring { get; set; } = true; + + /// + /// Interval for proactive health monitoring of idle connections. + /// Default: 5 minutes + /// + public TimeSpan HealthCheckInterval { get; set; } = TimeSpan.FromMinutes(5); +} +``` + +### 8. Add Background Health Monitor (Optional Enhancement) + +**File:** `Pooling/ConnectionHealthMonitor.cs` + +```csharp +/// +/// Background service that monitors connection health and removes unhealthy connections. +/// +public sealed class ConnectionHealthMonitor : BackgroundService +{ + private readonly IDataverseConnectionPool _pool; + private readonly ConnectionPoolOptions _options; + private readonly ILogger _logger; + + public ConnectionHealthMonitor( + IDataverseConnectionPool pool, + IOptions options, + ILogger logger) + { + _pool = pool; + _options = options.Value; + _logger = logger; + } + + protected override async Task ExecuteAsync(CancellationToken stoppingToken) + { + if (!_options.EnableHealthMonitoring) + { + _logger.LogInformation("Connection health monitoring is disabled"); + return; + } + + _logger.LogInformation( + "Connection health monitoring started. Interval: {Interval}", + _options.HealthCheckInterval); + + while (!stoppingToken.IsCancellationRequested) + { + try + { + await Task.Delay(_options.HealthCheckInterval, stoppingToken); + + // Trigger health check on pool + // Pool will remove unhealthy idle connections + var stats = _pool.Statistics; + + _logger.LogDebug( + "Health check complete. Active: {Active}, Idle: {Idle}, Invalid: {Invalid}", + stats.ActiveConnections, + stats.IdleConnections, + stats.InvalidConnections); + } + catch (OperationCanceledException) when (stoppingToken.IsCancellationRequested) + { + // Normal shutdown + break; + } + catch (Exception ex) + { + _logger.LogError(ex, "Error during connection health check"); + } + } + + _logger.LogInformation("Connection health monitoring stopped"); + } +} +``` + +### 9. Update PoolStatistics + +**File:** `Pooling/PoolStatistics.cs` + +```csharp +public class PoolStatistics +{ + // ... existing properties ... + + /// + /// Number of connections that were invalidated due to failures. + /// + public long InvalidConnections { get; init; } + + /// + /// Number of auth failures detected. + /// + public long AuthFailures { get; init; } + + /// + /// Number of connection failures detected. + /// + public long ConnectionFailures { get; init; } + + /// + /// Number of successful connection health checks. + /// + public long HealthChecksSuccess { get; init; } + + /// + /// Number of failed connection health checks. + /// + public long HealthChecksFailed { get; init; } +} +``` + +--- + +## Configuration Recommendations + +### For Data Migrations (Batch Jobs) + +```csharp +services.AddDataverseConnectionPool(options => +{ + options.Pool.MaxPoolSize = 50; + options.Pool.MinPoolSize = 5; + options.Pool.MaxLifetime = TimeSpan.FromMinutes(60); + options.Pool.MaxIdleTime = TimeSpan.FromMinutes(10); + options.Pool.ValidateOnCheckout = true; + options.Pool.MaxConnectionRetries = 2; + options.Pool.EnableHealthMonitoring = false; // Not needed for batch +}); +``` + +### For Always-On Integrations + +```csharp +services.AddDataverseConnectionPool(options => +{ + options.Pool.MaxPoolSize = 20; + options.Pool.MinPoolSize = 5; + options.Pool.MaxLifetime = TimeSpan.FromMinutes(120); // Longer-lived + options.Pool.MaxIdleTime = TimeSpan.FromMinutes(30); // Keep connections warm + options.Pool.ValidateOnCheckout = true; + options.Pool.MaxConnectionRetries = 3; + options.Pool.EnableHealthMonitoring = true; + options.Pool.HealthCheckInterval = TimeSpan.FromMinutes(5); +}); +``` + +--- + +## Error Handling Matrix + +| Error Type | Detection | Action | Retry? | +|------------|-----------|--------|--------| +| Service Protection (429) | `IsServiceProtectionError()` | Record throttle, route away | Yes - different connection | +| Auth Failure | `IsAuthFailure()` | Mark invalid, dispose | Yes - new connection | +| Connection Failure | `IsConnectionFailure()` | Mark invalid, dispose | Yes - new connection | +| Business Logic Error | Fault without above patterns | Return to pool normally | No - throw to caller | +| Data Validation Error | Fault with specific codes | Return to pool normally | No - throw to caller | + +--- + +## Testing Requirements + +### Unit Tests + +1. **Connection Validation** + - Verify `IsConnectionHealthy` returns false for `IsReady == false` + - Verify `IsConnectionHealthy` returns false for aged connections + - Verify `IsConnectionHealthy` returns false for invalid connections + +2. **Connection Invalidation** + - Verify `MarkInvalid` sets `IsInvalid = true` + - Verify invalid connections are disposed, not returned to pool + - Verify `InvalidReason` is captured + +3. **Auth Failure Detection** + - Verify `IsAuthFailure` correctly identifies auth error codes + - Verify `IsAuthFailure` detects auth-related messages + +4. **Failure Recovery** + - Verify auth failures trigger connection invalidation + - Verify retry uses new connection + - Verify max retries is respected + +### Integration Tests + +1. **Simulated Auth Failure** + - Create mock that fails auth on first attempt + - Verify connection is invalidated + - Verify second attempt uses new connection + - Verify operation succeeds on retry + +2. **Connection Validation** + - Create connection that reports `IsReady = false` + - Verify pool gets different connection + - Verify unhealthy connection is disposed + +3. **Pool Statistics** + - Trigger various failure types + - Verify statistics accurately reflect failures + +### Long-Running Tests + +1. **Overnight Stability** + - Run pool for 8+ hours + - Verify connections are recycled properly + - Verify no memory leaks + - Verify token refresh works correctly + +--- + +## Acceptance Criteria + +1. [ ] Connections are validated on checkout (IsReady, age, validity) +2. [ ] Invalid connections are disposed, not returned to pool +3. [ ] `MarkInvalid()` method exists on `IDataverseClient` +4. [ ] Auth failures are detected and trigger connection invalidation +5. [ ] Connection failures are detected and trigger connection invalidation +6. [ ] Failed operations are retried with new connections +7. [ ] Max retries prevents infinite loops +8. [ ] `MaxLifetime` default increased to 60 minutes +9. [ ] `ValidateOnCheckout` is configurable (default true) +10. [ ] `MaxConnectionRetries` is configurable (default 2) +11. [ ] Statistics include failure counts +12. [ ] Logging includes connection ID and failure reason +13. [ ] All unit tests pass +14. [ ] Integration tests verify recovery behavior +15. [ ] Long-running test confirms stability + +--- + +## Risks and Mitigations + +| Risk | Mitigation | +|------|------------| +| False positive auth detection | Conservative error code list, logged for debugging | +| Connection validation overhead | Only validate on checkout, not continuously | +| Pool exhaustion during recovery | `MaxConnectionRetries` limits retry attempts | +| Memory leaks from invalid connections | Invalid connections are disposed immediately | +| Health monitor resource usage | Configurable interval, can be disabled | + +--- + +## Future Enhancements + +1. **Proactive Token Refresh** - Monitor token expiry and refresh before operations +2. **Circuit Breaker per Connection** - Temporarily exclude connections with repeated failures +3. **Connection Warmup** - Pre-create connections to `MinPoolSize` on startup +4. **Metrics Export** - Export pool health metrics to monitoring systems (Prometheus, App Insights) +5. **Graceful Shutdown** - Wait for in-flight operations before disposing connections diff --git a/docs/specs/MINVER_VERSIONING.md b/docs/specs/MINVER_VERSIONING.md new file mode 100644 index 00000000..bc99a00f --- /dev/null +++ b/docs/specs/MINVER_VERSIONING.md @@ -0,0 +1,799 @@ +# MinVer Versioning Implementation Spec + +## Overview + +This document specifies the implementation of automated versioning using [MinVer](https://github.com/adamralph/minver) for the PPDS SDK monorepo, enabling independent per-package releases with git tag-driven version management. + +--- + +## Current State + +| Package | Current Version | Location | +|---------|-----------------|----------| +| PPDS.Plugins | 1.1.0 | `src/PPDS.Plugins/PPDS.Plugins.csproj` | +| PPDS.Dataverse | 1.0.0-alpha1 | `src/PPDS.Dataverse/PPDS.Dataverse.csproj` | +| PPDS.Migration | 1.0.0-alpha1 | `src/PPDS.Migration/PPDS.Migration.csproj` | +| PPDS.Migration.Cli | 1.0.0-alpha1 | `src/PPDS.Migration.Cli/PPDS.Migration.Cli.csproj` | + +**Problems with current approach:** +1. Versions hardcoded in `.csproj` - easy to forget updates +2. Single release publishes ALL packages regardless of changes +3. No automated pre-release versioning +4. Tag version and package version can diverge + +--- + +## Package Dependency Tree + +``` +PPDS.Plugins (independent) + +PPDS.Migration.Cli +├── PPDS.Migration +│ └── PPDS.Dataverse +└── PPDS.Dataverse +``` + +**Key Insight:** PPDS.Migration and PPDS.Migration.Cli are tightly coupled and should share versions. They will be released together as a unit. + +--- + +## Proposed Versioning Strategy + +### Package Groups + +| Group | Packages | Tag Prefix | Rationale | +|-------|----------|------------|-----------| +| **Plugins** | PPDS.Plugins | `Plugins-v` | Stable, independent, rarely changes | +| **Dataverse** | PPDS.Dataverse | `Dataverse-v` | Core library, independent release cycle | +| **Migration** | PPDS.Migration, PPDS.Migration.Cli | `Migration-v` | Tightly coupled, release together | + +### Tag Format + +``` +{PackageGroup}-v{Major}.{Minor}.{Patch}[-{PreRelease}] +``` + +**Examples:** +``` +Plugins-v1.1.0 → PPDS.Plugins 1.1.0 +Dataverse-v1.2.0 → PPDS.Dataverse 1.2.0 +Dataverse-v1.2.0-alpha → PPDS.Dataverse 1.2.0-alpha +Migration-v1.0.0 → PPDS.Migration 1.0.0 + PPDS.Migration.Cli 1.0.0 +Migration-v1.0.0-beta.1 → PPDS.Migration 1.0.0-beta.1 + PPDS.Migration.Cli 1.0.0-beta.1 +``` + +### Pre-Release Versioning + +MinVer automatically generates pre-release versions based on commits since the last tag: + +| Scenario | Result | +|----------|--------| +| Tagged `Dataverse-v1.2.0` | 1.2.0 | +| 3 commits after tag | 1.2.1-alpha.0.3 (auto-generated) | +| Tagged `Dataverse-v1.3.0-beta.1` | 1.3.0-beta.1 | + +**Explicit pre-release tags** should follow this convention: +- Alpha: `Dataverse-v1.2.0-alpha.1` +- Beta: `Dataverse-v1.2.0-beta.1` +- Release Candidate: `Dataverse-v1.2.0-rc.1` + +--- + +## Implementation Changes + +### 1. Add MinVer Package References + +**Create `Directory.Build.props` in repo root:** + +```xml + + + + latest + enable + enable + + +``` + +**Add MinVer to each publishable project:** + +```xml + + + + + + Plugins-v + alpha.0 + +``` + +```xml + + + + + + Dataverse-v + alpha.0 + +``` + +```xml + + + + + + Migration-v + alpha.0 + +``` + +### 2. Remove Hardcoded Versions + +**Remove from each `.csproj`:** +```xml + +1.0.0-alpha1 +``` + +MinVer will set `Version`, `PackageVersion`, `AssemblyVersion`, and `FileVersion` automatically. + +### 3. Update CI/CD Workflow + +**Replace `publish-nuget.yml` with per-package release support:** + +```yaml +name: Publish to NuGet + +on: + push: + tags: + - 'Plugins-v*' + - 'Dataverse-v*' + - 'Migration-v*' + +jobs: + publish: + runs-on: windows-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Required for MinVer to read git history + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: | + 8.0.x + 10.0.x + + - name: Determine package to publish + id: package + shell: bash + run: | + TAG=${GITHUB_REF#refs/tags/} + echo "tag=$TAG" >> $GITHUB_OUTPUT + + if [[ $TAG == Plugins-v* ]]; then + echo "package=PPDS.Plugins" >> $GITHUB_OUTPUT + echo "projects=src/PPDS.Plugins/PPDS.Plugins.csproj" >> $GITHUB_OUTPUT + elif [[ $TAG == Dataverse-v* ]]; then + echo "package=PPDS.Dataverse" >> $GITHUB_OUTPUT + echo "projects=src/PPDS.Dataverse/PPDS.Dataverse.csproj" >> $GITHUB_OUTPUT + elif [[ $TAG == Migration-v* ]]; then + echo "package=PPDS.Migration" >> $GITHUB_OUTPUT + echo "projects=src/PPDS.Migration/PPDS.Migration.csproj src/PPDS.Migration.Cli/PPDS.Migration.Cli.csproj" >> $GITHUB_OUTPUT + else + echo "Unknown tag format: $TAG" + exit 1 + fi + + - name: Show version info + shell: bash + run: | + echo "Tag: ${{ steps.package.outputs.tag }}" + echo "Package: ${{ steps.package.outputs.package }}" + echo "Projects: ${{ steps.package.outputs.projects }}" + + - name: Restore dependencies + run: dotnet restore + + - name: Build + run: dotnet build --configuration Release --no-restore + + - name: Pack specific projects + shell: bash + run: | + mkdir -p ./nupkgs + for project in ${{ steps.package.outputs.projects }}; do + echo "Packing $project" + dotnet pack "$project" --configuration Release --no-build --output ./nupkgs + done + + - name: List packages + shell: bash + run: ls -la ./nupkgs/ + + - name: Push packages to NuGet + shell: bash + env: + NUGET_API_KEY: ${{ secrets.NUGET_API_KEY }} + run: | + for package in ./nupkgs/*.nupkg; do + echo "Pushing $package" + dotnet nuget push "$package" --api-key "$NUGET_API_KEY" --source https://api.nuget.org/v3/index.json --skip-duplicate + done + + - name: Push symbols to NuGet + shell: bash + env: + NUGET_API_KEY: ${{ secrets.NUGET_API_KEY }} + run: | + for package in ./nupkgs/*.snupkg; do + echo "Pushing $package" + dotnet nuget push "$package" --api-key "$NUGET_API_KEY" --source https://api.nuget.org/v3/index.json --skip-duplicate + done + continue-on-error: true +``` + +--- + +## Release Process + +### Creating a Release + +**Step 1: Create and push the tag** +```bash +# For PPDS.Plugins +git tag Plugins-v1.2.0 +git push origin Plugins-v1.2.0 + +# For PPDS.Dataverse +git tag Dataverse-v1.3.0 +git push origin Dataverse-v1.3.0 + +# For PPDS.Migration (includes CLI) +git tag Migration-v1.1.0 +git push origin Migration-v1.1.0 + +# For pre-release +git tag Dataverse-v1.3.0-beta.1 +git push origin Dataverse-v1.3.0-beta.1 +``` + +**Step 2: CI/CD automatically:** +1. Detects the tag pattern +2. Builds the specific package(s) +3. MinVer reads the tag and sets the version +4. Packs only the relevant package(s) +5. Pushes to NuGet + +**Step 3: Create GitHub Release (optional but recommended)** +- Go to GitHub → Releases → "Draft a new release" +- Select the tag you just pushed +- Add release notes (see Release Notes section below) +- Publish + +### Pre-Release Workflow + +```bash +# Start alpha phase +git tag Migration-v1.1.0-alpha.1 +git push origin Migration-v1.1.0-alpha.1 + +# Iterate on alpha +git tag Migration-v1.1.0-alpha.2 +git push origin Migration-v1.1.0-alpha.2 + +# Move to beta +git tag Migration-v1.1.0-beta.1 +git push origin Migration-v1.1.0-beta.1 + +# Release candidate +git tag Migration-v1.1.0-rc.1 +git push origin Migration-v1.1.0-rc.1 + +# Final release +git tag Migration-v1.1.0 +git push origin Migration-v1.1.0 +``` + +--- + +## Release Notes Strategy + +### Single CHANGELOG.md (Recommended) + +Keep the current `CHANGELOG.md` but organize by package: + +```markdown +# Changelog + +## [Unreleased] + +### PPDS.Dataverse +- Added feature X + +### PPDS.Migration +- Fixed bug Y + +## PPDS.Plugins v1.2.0 - 2025-01-15 + +### Added +- New feature A + +## PPDS.Dataverse v1.3.0 - 2025-01-10 + +### Added +- New feature B + +### Fixed +- Bug fix C + +## PPDS.Migration v1.1.0 - 2025-01-10 + +### Added +- CLI command Z +``` + +### GitHub Release Notes + +When creating a GitHub Release, copy the relevant section from CHANGELOG.md: + +**Example: `Dataverse-v1.3.0` Release** +```markdown +## What's New + +### Added +- Bulk operations with parallel batch processing +- Connection pooling with throttle-aware routing +- TVP race condition retry (SQL 3732) +- SQL deadlock retry (SQL 1205) + +### Fixed +- Connection pool leak on disposal + +## Installation + +```bash +dotnet add package PPDS.Dataverse --version 1.3.0 +``` + +## Full Changelog +See [CHANGELOG.md](https://github.com/joshsmithxrm/ppds-sdk/blob/main/CHANGELOG.md) +``` + +--- + +## Version Compatibility + +### Major Version Sync Rule + +Per existing CLAUDE.md guidance, **major versions stay in sync across ecosystem**: + +| Package | Valid Versions | Notes | +|---------|----------------|-------| +| PPDS.Plugins | 1.x.x | Stable | +| PPDS.Dataverse | 1.x.x | Must match major with Migration | +| PPDS.Migration | 1.x.x | Must match major with Dataverse | + +**When to bump major version:** +- Breaking API changes in any package +- Coordinate major bumps across all packages + +### Dependency Version Constraints + +When PPDS.Migration references PPDS.Dataverse, use **minimum version**: + +```xml + + +``` + +This means "1.0.0 or higher within major version 1.x". + +--- + +## Migration Plan + +### Phase 1: Prepare (This PR) +1. Add MinVer to all publishable projects +2. Remove hardcoded `` elements +3. Update CI/CD workflow +4. Test locally with `dotnet pack` to verify versions + +### Phase 2: Initial Tags +After merging, create initial tags to establish version baseline: +```bash +git tag Plugins-v1.1.0 # Match current version +git tag Dataverse-v1.0.0 # Start fresh for stable release +git tag Migration-v1.0.0 # Start fresh for stable release +``` + +### Phase 3: Verify +1. Push one tag +2. Verify CI/CD triggers correctly +3. Verify NuGet package has correct version +4. Repeat for other packages + +--- + +## Local Development + +### Checking Current Version + +```bash +# MinVer shows version during build +dotnet build src/PPDS.Dataverse/PPDS.Dataverse.csproj -v minimal + +# Or explicitly +dotnet msbuild src/PPDS.Dataverse/PPDS.Dataverse.csproj -t:MinVer +``` + +### Simulating a Release Locally + +```bash +# Create a local tag (don't push) +git tag Dataverse-v1.5.0 + +# Build and pack +dotnet pack src/PPDS.Dataverse/PPDS.Dataverse.csproj -o ./nupkgs + +# Check the version +ls ./nupkgs/ +# Should show: PPDS.Dataverse.1.5.0.nupkg + +# Delete the local tag +git tag -d Dataverse-v1.5.0 +``` + +--- + +## FAQ + +### Q: What if I forget to tag before releasing? +MinVer uses the last tag it can find. If no tag exists for that package prefix, it defaults to `0.0.0-alpha.0.{height}` where height is commit count. Always tag before releasing. + +### Q: Can I release multiple packages at once? +Yes, push multiple tags: +```bash +git tag Dataverse-v1.3.0 +git tag Migration-v1.1.0 +git push origin Dataverse-v1.3.0 Migration-v1.1.0 +``` +Each tag triggers a separate workflow run. + +### Q: What happens to existing v1.1.0 tag? +The old `v1.1.0` tag format won't match any `MinVerTagPrefix`, so it will be ignored. Leave it for historical reference or delete it. + +### Q: How do I see what version will be built? +```bash +dotnet build -v minimal 2>&1 | grep "MinVer" +``` + +### Q: Can I override the version manually? +Yes, for emergencies: +```bash +dotnet pack -p:Version=1.2.3-hotfix +``` +But prefer tagging for traceability. + +--- + +## Changelog Strategy + +### Per-Package CHANGELOG Files + +Each publishable package group gets its own CHANGELOG: + +``` +src/ +├── PPDS.Plugins/ +│ └── CHANGELOG.md +├── PPDS.Dataverse/ +│ └── CHANGELOG.md +└── PPDS.Migration/ + └── CHANGELOG.md # Covers library + CLI (same release) +``` + +### CHANGELOG Format + +Each per-package CHANGELOG follows Keep a Changelog format: + +```markdown +# Changelog - PPDS.Dataverse + +All notable changes to PPDS.Dataverse will be documented in this file. + +## [Unreleased] + +## [1.0.0] - 2025-01-XX + +### Added +- Bulk operations with parallel batch processing +- Connection pooling with throttle-aware routing +- TVP race condition retry (SQL 3732) +- SQL deadlock retry (SQL 1205) + +### Fixed +- Connection pool leak on disposal + +[Unreleased]: https://github.com/joshsmithxrm/ppds-sdk/compare/Dataverse-v1.0.0...HEAD +[1.0.0]: https://github.com/joshsmithxrm/ppds-sdk/releases/tag/Dataverse-v1.0.0 +``` + +### Root CHANGELOG.md + +Convert to an index/overview file: + +```markdown +# PPDS SDK Changelog Index + +This repository contains multiple packages. See per-package changelogs: + +- [PPDS.Plugins](src/PPDS.Plugins/CHANGELOG.md) - Plugin attributes for Dataverse +- [PPDS.Dataverse](src/PPDS.Dataverse/CHANGELOG.md) - High-performance Dataverse connectivity +- [PPDS.Migration](src/PPDS.Migration/CHANGELOG.md) - Migration library and CLI tool + +For GitHub Releases with full release notes, see: +https://github.com/joshsmithxrm/ppds-sdk/releases +``` + +--- + +## Documentation Updates + +### sdk/CLAUDE.md Changes + +#### Update Project Structure Section + +```markdown +## 📁 Project Structure + +``` +ppds-sdk/ +├── src/ +│ ├── PPDS.Plugins/ +│ │ ├── CHANGELOG.md # Package changelog +│ │ ├── Attributes/ +│ │ ├── Enums/ +│ │ ├── PPDS.Plugins.csproj +│ │ └── PPDS.Plugins.snk +│ ├── PPDS.Dataverse/ +│ │ ├── CHANGELOG.md # Package changelog +│ │ ├── BulkOperations/ +│ │ ├── Pooling/ +│ │ ├── Resilience/ +│ │ └── PPDS.Dataverse.csproj +│ ├── PPDS.Migration/ +│ │ ├── CHANGELOG.md # Package changelog (covers CLI too) +│ │ └── PPDS.Migration.csproj +│ └── PPDS.Migration.Cli/ +│ └── PPDS.Migration.Cli.csproj +├── tests/ +├── docs/ +│ ├── adr/ +│ ├── architecture/ +│ └── specs/ +├── .github/workflows/ +│ ├── build.yml +│ ├── test.yml +│ └── publish-nuget.yml +├── PPDS.Sdk.sln +└── CHANGELOG.md # Index pointing to per-package changelogs +``` +``` + +#### Replace Version Management Section + +```markdown +## 📦 Version Management + +### MinVer (Automated Versioning) + +Versions are determined automatically from git tags using [MinVer](https://github.com/adamralph/minver). + +| Package Group | Tag Prefix | Example Tag | +|---------------|------------|-------------| +| PPDS.Plugins | `Plugins-v` | `Plugins-v1.2.0` | +| PPDS.Dataverse | `Dataverse-v` | `Dataverse-v1.0.0` | +| PPDS.Migration + CLI | `Migration-v` | `Migration-v1.0.0` | + +**Pre-release versions:** +```bash +Dataverse-v1.0.0-alpha.1 # Alpha +Dataverse-v1.0.0-beta.1 # Beta +Dataverse-v1.0.0-rc.1 # Release candidate +Dataverse-v1.0.0 # Stable release +``` + +**Between tags:** MinVer auto-generates versions like `1.0.1-alpha.0.3` (3 commits after 1.0.0). + +### Major Version Sync + +Major versions stay in sync across ecosystem for compatibility: +- PPDS.Plugins 1.x, PPDS.Dataverse 1.x, PPDS.Migration 1.x = compatible +- Major bump in any package = coordinate across all packages +``` + +#### Replace Release Process Section + +```markdown +## 🚀 Release Process + +### Per-Package Release + +1. **Update package CHANGELOG** (`src/PPDS.{Package}/CHANGELOG.md`) +2. **Merge to main** +3. **Create and push tag:** + ```bash + git tag Dataverse-v1.0.0 + git push origin Dataverse-v1.0.0 + ``` +4. **Create GitHub Release:** + - Go to Releases → "Draft new release" + - Select the tag + - Copy release notes from package CHANGELOG + - Publish → CI publishes to NuGet + +### Pre-Release Workflow + +```bash +git tag Dataverse-v1.0.0-alpha.1 && git push origin Dataverse-v1.0.0-alpha.1 +git tag Dataverse-v1.0.0-beta.1 && git push origin Dataverse-v1.0.0-beta.1 +git tag Dataverse-v1.0.0-rc.1 && git push origin Dataverse-v1.0.0-rc.1 +git tag Dataverse-v1.0.0 && git push origin Dataverse-v1.0.0 +``` + +### Multi-Package Release + +To release multiple packages: +```bash +git tag Dataverse-v1.0.0 +git tag Migration-v1.0.0 +git push origin Dataverse-v1.0.0 Migration-v1.0.0 +``` +Create separate GitHub Releases for each tag. + +**Required Secret:** `NUGET_API_KEY` +``` + +#### Update Development Workflow Section + +```markdown +## 🔄 Development Workflow + +### Making Changes + +1. Create feature branch from `main` +2. Make changes +3. Run `dotnet build` and `dotnet test` +4. Update the relevant package CHANGELOG (`src/PPDS.{Package}/CHANGELOG.md`) +5. Create PR to `main` +``` + +#### Update Key Files Section + +```markdown +## 📋 Key Files + +| File | Purpose | +|------|---------| +| `src/PPDS.*/CHANGELOG.md` | Per-package release notes | +| `CHANGELOG.md` | Index pointing to per-package changelogs | +| `PPDS.Plugins.snk` | Strong name key (DO NOT regenerate) | +| `.editorconfig` | Code style settings | +| `docs/specs/MINVER_VERSIONING.md` | Versioning implementation spec | +``` + +#### Update NEVER Section + +Add to the NEVER table: + +```markdown +| Manually set `` in csproj | MinVer manages versions via git tags | +``` + +#### Update ALWAYS Section + +Update the CHANGELOG rule: + +```markdown +| Update package CHANGELOG with changes | Per-package release notes in `src/PPDS.{Package}/CHANGELOG.md` | +``` + +### Root CLAUDE.md (ppds/) Changes + +#### Update Versioning Section + +```markdown +## 📦 Versioning + +- All repos use SemVer +- Major versions stay in sync across ecosystem for compatibility +- Each repo has independent minor/patch versions +- **ppds-sdk uses MinVer** with per-package tags (e.g., `Plugins-v1.0.0`, `Dataverse-v1.0.0`) + +### SDK Package Tags + +| Package | Tag Format | Example | +|---------|------------|---------| +| PPDS.Plugins | `Plugins-v{version}` | `Plugins-v1.1.0` | +| PPDS.Dataverse | `Dataverse-v{version}` | `Dataverse-v1.0.0` | +| PPDS.Migration | `Migration-v{version}` | `Migration-v1.0.0` | +``` + +#### Update Coordinated Release Process + +```markdown +## 🚀 Coordinated Release Process + +When releasing a new major version across the ecosystem: + +1. **ppds-sdk** - Create per-package tags and GitHub Releases: + - `Plugins-v2.0.0` (if changed) + - `Dataverse-v2.0.0` + - `Migration-v2.0.0` +2. **ppds-tools** - Update and tag (PowerShell Gallery must publish) +3. **ppds-alm** - Tag (templates reference specific versions) +4. **ppds-demo** - Update to use new versions +5. **extension** - Update if needed +``` + +--- + +## Summary of Changes + +| File | Change | +|------|--------| +| `src/PPDS.Plugins/PPDS.Plugins.csproj` | Add MinVer, remove ``, add `MinVerTagPrefix` | +| `src/PPDS.Dataverse/PPDS.Dataverse.csproj` | Add MinVer, remove ``, add `MinVerTagPrefix` | +| `src/PPDS.Migration/PPDS.Migration.csproj` | Add MinVer, remove ``, add `MinVerTagPrefix` | +| `src/PPDS.Migration.Cli/PPDS.Migration.Cli.csproj` | Add MinVer, remove ``, add `MinVerTagPrefix` | +| `.github/workflows/publish-nuget.yml` | Update to filter by tag pattern | +| `CHANGELOG.md` | Convert to index pointing to per-package changelogs | +| `src/PPDS.Plugins/CHANGELOG.md` | **NEW** - Per-package changelog | +| `src/PPDS.Dataverse/CHANGELOG.md` | **NEW** - Per-package changelog | +| `src/PPDS.Migration/CHANGELOG.md` | **NEW** - Per-package changelog (covers CLI) | +| `CLAUDE.md` (sdk) | Update Version Management, Release Process, Project Structure, Key Files, NEVER/ALWAYS rules | +| `CLAUDE.md` (root ppds) | Update Versioning section with SDK tag formats | + +--- + +## Migration Checklist + +### Pre-Implementation +- [ ] Review and approve this spec + +### Implementation (Single PR) +- [ ] Add MinVer to all publishable .csproj files +- [ ] Remove hardcoded `` elements +- [ ] Update `.github/workflows/publish-nuget.yml` +- [ ] Create per-package CHANGELOG.md files (migrate content from root) +- [ ] Convert root CHANGELOG.md to index +- [ ] Update sdk/CLAUDE.md +- [ ] Update root ppds/CLAUDE.md +- [ ] Test locally: `dotnet pack` shows correct versions + +### Post-Merge +- [ ] Create new tags at appropriate commits: + - `Plugins-v1.1.0` at same commit as old `v1.1.0` + - `Dataverse-v1.0.0` at HEAD + - `Migration-v1.0.0` at HEAD +- [ ] Delete old `v1.1.0` tag +- [ ] Create GitHub Releases for each tag +- [ ] Verify NuGet packages published correctly + +--- + +## References + +- [MinVer GitHub](https://github.com/adamralph/minver) +- [MinVer Documentation](https://github.com/adamralph/minver#readme) +- [Semantic Versioning](https://semver.org/) +- [Keep a Changelog](https://keepachangelog.com/) diff --git a/docs/specs/THROTTLE_DETECTION_AND_ROUTING.md b/docs/specs/THROTTLE_DETECTION_AND_ROUTING.md new file mode 100644 index 00000000..00c29a3b --- /dev/null +++ b/docs/specs/THROTTLE_DETECTION_AND_ROUTING.md @@ -0,0 +1,588 @@ +# Specification: Throttle Detection and Intelligent Routing + +**Status:** Draft +**Author:** Claude Code +**Date:** 2025-12-22 +**Priority:** High + +--- + +## Problem Statement + +The PPDS.Dataverse SDK has infrastructure for throttle-aware connection routing (`ThrottleTracker`, `ThrottleAwareStrategy`, `ServiceProtectionException`) but this infrastructure is not wired up. The `RecordThrottle()` method is never called, making `ThrottleAwareStrategy` non-functional. + +When service protection limits are hit: +- The SDK doesn't detect the throttle event +- The SDK doesn't record which connection was throttled +- The SDK doesn't route future requests away from throttled connections +- Operators have no visibility into throttling behavior + +This defeats the purpose of multi-user connection pooling, where the goal is to route away from throttled Application Users to maximize throughput. + +--- + +## Microsoft's Guidance + +### Service Protection Limits (Per User, Per 5-Minute Sliding Window) + +| Limit | Threshold | Error Code | +|-------|-----------|------------| +| Requests | 6,000 | `-2147015902` (`0x80072322`) | +| Execution time | 20 minutes (1,200,000 ms) | `-2147015903` (`0x80072321`) | +| Concurrent requests | 52 | `-2147015898` (`0x80072326`) | + +### How Throttle Errors Are Returned + +**SDK (.NET):** +```csharp +// Thrown as FaultException +// Retry-After is in ErrorDetails collection +TimeSpan retryAfter = (TimeSpan)fault.ErrorDetails["Retry-After"]; +int errorCode = fault.ErrorCode; +``` + +### Microsoft's Recommended Approach + +From [Service protection API limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits): + +> "Backing off requests using the Retry-After delay is the fastest way to recover from throttling." + +> "If you aren't getting some service protection limit errors, you haven't maximized your application's capability." + +**Key insight:** Throttling is expected at maximum throughput. The goal is not to avoid throttling but to handle it correctly. + +### References + +- [Service protection API limits](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits) +- [Retry operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/api-limits#retry-operations) +- [Optimize performance for bulk operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/optimize-performance-create-update) + +--- + +## Current State + +### Existing Components + +| Component | Location | Status | +|-----------|----------|--------| +| `ServiceProtectionException` | `Resilience/ServiceProtectionException.cs` | Complete - has error codes, `IsServiceProtectionError()` | +| `ThrottleTracker` | `Resilience/ThrottleTracker.cs` | Complete - can record throttles, check if throttled | +| `IThrottleTracker` | `Resilience/IThrottleTracker.cs` | Complete - interface defined | +| `ThrottleState` | `Resilience/ThrottleState.cs` | Complete - tracks expiry | +| `ThrottleAwareStrategy` | `Pooling/Strategies/ThrottleAwareStrategy.cs` | Complete - routes away from throttled | +| `PoolStatistics` | `Pooling/PoolStatistics.cs` | Partial - has `ThrottledConnections` but not populated | + +### What's Missing + +1. **Detection:** `BulkOperationExecutor` catches `OrganizationServiceFault` for `Plugin.BulkApiErrorDetails` but doesn't check for service protection errors. + +2. **Recording:** `RecordThrottle()` is never called anywhere in the codebase. + +3. **Retry Logic:** No retry-after-throttle logic in `BulkOperationExecutor`. + +4. **Statistics:** `PoolStatistics.ThrottledConnections` is not populated. + +--- + +## Proposed Solution + +### Architecture + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ BulkOperationExecutor │ +├─────────────────────────────────────────────────────────────────────┤ +│ ExecuteBatchAsync() │ +│ ┌─────────────────────────────────────────────────────────────┐ │ +│ │ 1. Get connection from pool │ │ +│ │ 2. Execute request │ │ +│ │ 3. On success → return result │ │ +│ │ 4. On FaultException: │ │ +│ │ a. Check IsServiceProtectionError(errorCode) │ │ +│ │ b. If YES: │ │ +│ │ - Extract Retry-After from ErrorDetails │ │ +│ │ - Call throttleTracker.RecordThrottle() │ │ +│ │ - If other connections available → retry immediately │ │ +│ │ - If all throttled → wait shortest expiry → retry │ │ +│ │ c. If NO: │ │ +│ │ - Handle as regular error │ │ +│ └─────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ IThrottleTracker │ +├─────────────────────────────────────────────────────────────────────┤ +│ RecordThrottle(connectionName, retryAfter) │ +│ IsThrottled(connectionName) → bool │ +│ GetThrottleExpiry(connectionName) → DateTime? │ +│ TotalThrottleEvents → long │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ ThrottleAwareStrategy │ +├─────────────────────────────────────────────────────────────────────┤ +│ SelectConnection(): │ +│ - Filter out connections where IsThrottled() == true │ +│ - If none available, return one with shortest expiry │ +│ - Return least-recently-used among available │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +### Retry Strategy + +**Priority Order:** +1. **Try a different connection immediately** - If AppUser1 is throttled but AppUser2 is available, switch immediately without waiting. +2. **Wait only when all connections are throttled** - Find the shortest `Retry-After` across all connections and wait that duration. +3. **Respect maximum retries** - Don't retry indefinitely. After N attempts, throw `ServiceProtectionException` to let the consumer decide. + +**Why this order:** +- Maximizes throughput by using available quota immediately +- Only waits when absolutely necessary +- Prevents infinite retry loops + +--- + +## Implementation Details + +### 1. Add IThrottleTracker to BulkOperationExecutor + +**File:** `BulkOperations/BulkOperationExecutor.cs` + +```csharp +public sealed class BulkOperationExecutor : IBulkOperationExecutor +{ + private readonly IDataverseConnectionPool _connectionPool; + private readonly IThrottleTracker _throttleTracker; // ADD + private readonly DataverseOptions _options; + private readonly ILogger _logger; + + public BulkOperationExecutor( + IDataverseConnectionPool connectionPool, + IThrottleTracker throttleTracker, // ADD + IOptions options, + ILogger logger) + { + _connectionPool = connectionPool ?? throw new ArgumentNullException(nameof(connectionPool)); + _throttleTracker = throttleTracker ?? throw new ArgumentNullException(nameof(throttleTracker)); + _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } +} +``` + +### 2. Add Throttle Detection Helper + +**File:** `BulkOperations/BulkOperationExecutor.cs` + +```csharp +/// +/// Checks if an exception is a service protection throttle and extracts Retry-After. +/// +/// The exception to check. +/// The Retry-After duration if throttled. +/// The error code if throttled. +/// True if this is a service protection error. +private static bool TryGetThrottleInfo( + Exception exception, + out TimeSpan retryAfter, + out int errorCode) +{ + retryAfter = TimeSpan.Zero; + errorCode = 0; + + if (exception is not FaultException faultEx) + { + return false; + } + + var fault = faultEx.Detail; + errorCode = fault.ErrorCode; + + if (!ServiceProtectionException.IsServiceProtectionError(errorCode)) + { + return false; + } + + // Extract Retry-After from ErrorDetails + if (fault.ErrorDetails.TryGetValue("Retry-After", out var retryAfterObj) + && retryAfterObj is TimeSpan retryAfterSpan) + { + retryAfter = retryAfterSpan; + } + else + { + // Fallback if Retry-After not provided (shouldn't happen, but be safe) + retryAfter = TimeSpan.FromSeconds(30); + _logger.LogWarning( + "Service protection error without Retry-After. Using fallback: {Fallback}s", + retryAfter.TotalSeconds); + } + + return true; +} +``` + +### 3. Add Throttle-Aware Execution Wrapper + +**File:** `BulkOperations/BulkOperationExecutor.cs` + +```csharp +/// +/// Executes an operation with throttle detection, recording, and intelligent retry. +/// +private async Task ExecuteWithThrottleHandlingAsync( + Func> operation, + int maxRetries, + CancellationToken cancellationToken) +{ + var attempts = 0; + Exception? lastException = null; + + while (attempts < maxRetries) + { + attempts++; + IDataverseClient? client = null; + string? connectionName = null; + + try + { + client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken); + connectionName = client.ConnectionName; + + return await operation(client, cancellationToken); + } + catch (Exception ex) when (TryGetThrottleInfo(ex, out var retryAfter, out var errorCode)) + { + lastException = ex; + + // Record the throttle event + if (connectionName != null) + { + _throttleTracker.RecordThrottle(connectionName, retryAfter); + } + + _logger.LogWarning( + "Service protection limit hit. Connection: {Connection}, ErrorCode: {ErrorCode}, " + + "RetryAfter: {RetryAfter}, Attempt: {Attempt}/{MaxRetries}", + connectionName, errorCode, retryAfter, attempts, maxRetries); + + // Check if other connections are available + if (HasAvailableConnections()) + { + _logger.LogDebug("Other connections available. Retrying immediately on different connection."); + continue; // Retry immediately with different connection + } + + // All connections throttled - wait for shortest expiry + var waitTime = GetShortestThrottleExpiry(); + if (waitTime > TimeSpan.Zero && attempts < maxRetries) + { + _logger.LogInformation( + "All connections throttled. Waiting {WaitTime} before retry.", + waitTime); + await Task.Delay(waitTime, cancellationToken); + } + } + finally + { + if (client is IAsyncDisposable asyncDisposable) + { + await asyncDisposable.DisposeAsync(); + } + else if (client is IDisposable disposable) + { + disposable.Dispose(); + } + } + } + + // Max retries exceeded + throw new ServiceProtectionException( + "unknown", + TimeSpan.Zero, + 0, + lastException ?? new InvalidOperationException("Max retries exceeded")); +} + +/// +/// Checks if any connections are available (not throttled). +/// +private bool HasAvailableConnections() +{ + // This requires access to connection names from the pool + // Implementation depends on pool exposing this information + var stats = _connectionPool.Statistics; + return stats.ThrottledConnections < stats.TotalConnections; +} + +/// +/// Gets the shortest time until a throttled connection becomes available. +/// +private TimeSpan GetShortestThrottleExpiry() +{ + // This requires IThrottleTracker to expose GetShortestExpiry() + // Or iterate through connections and check each + // For now, return a reasonable default + return TimeSpan.FromSeconds(30); +} +``` + +### 4. Update IThrottleTracker Interface + +**File:** `Resilience/IThrottleTracker.cs` + +Add methods to support the retry logic: + +```csharp +/// +/// Gets the number of currently throttled connections. +/// +int ThrottledConnectionCount { get; } + +/// +/// Gets all currently throttled connection names. +/// +IReadOnlyCollection ThrottledConnections { get; } + +/// +/// Gets the shortest time until any throttled connection expires. +/// Returns TimeSpan.Zero if no connections are throttled. +/// +TimeSpan GetShortestExpiry(); +``` + +### 5. Update ThrottleTracker Implementation + +**File:** `Resilience/ThrottleTracker.cs` + +```csharp +public int ThrottledConnectionCount +{ + get + { + CleanupExpired(); + return _throttleStates.Count; + } +} + +public IReadOnlyCollection ThrottledConnections +{ + get + { + CleanupExpired(); + return _throttleStates.Keys.ToList().AsReadOnly(); + } +} + +public TimeSpan GetShortestExpiry() +{ + CleanupExpired(); + + if (_throttleStates.IsEmpty) + { + return TimeSpan.Zero; + } + + var now = DateTime.UtcNow; + var shortest = _throttleStates.Values + .Select(s => s.ExpiresAt - now) + .Where(t => t > TimeSpan.Zero) + .DefaultIfEmpty(TimeSpan.Zero) + .Min(); + + return shortest; +} + +private void CleanupExpired() +{ + var now = DateTime.UtcNow; + var expired = _throttleStates + .Where(kvp => kvp.Value.ExpiresAt <= now) + .Select(kvp => kvp.Key) + .ToList(); + + foreach (var key in expired) + { + _throttleStates.TryRemove(key, out _); + } +} +``` + +### 6. Update PoolStatistics + +**File:** `Pooling/PoolStatistics.cs` + +Ensure `ThrottledConnections` is populated from `IThrottleTracker`: + +```csharp +public class PoolStatistics +{ + // ... existing properties ... + + /// + /// Gets the number of currently throttled connections. + /// + public int ThrottledConnections { get; init; } + + /// + /// Gets the total number of throttle events since pool creation. + /// + public long TotalThrottleEvents { get; init; } +} +``` + +### 7. Update DI Registration + +**File:** `DependencyInjection/ServiceCollectionExtensions.cs` + +Ensure `IThrottleTracker` is registered and injected: + +```csharp +services.AddSingleton(); +// Ensure BulkOperationExecutor receives IThrottleTracker +``` + +### 8. Add Configuration Options + +**File:** `Resilience/ResilienceOptions.cs` + +```csharp +public class ResilienceOptions +{ + /// + /// Maximum number of retry attempts on service protection errors. + /// Default: 3 + /// + public int MaxThrottleRetries { get; set; } = 3; + + /// + /// Whether to enable throttle tracking for connection routing. + /// Default: true + /// + public bool EnableThrottleTracking { get; set; } = true; + + /// + /// Fallback Retry-After duration when not provided by server. + /// Default: 30 seconds + /// + public TimeSpan FallbackRetryAfter { get; set; } = TimeSpan.FromSeconds(30); +} +``` + +--- + +## Integration Points + +### Where to Add Throttle Handling + +| Method | File | Change Required | +|--------|------|-----------------| +| `ExecuteCreateMultipleBatchAsync` | `BulkOperationExecutor.cs` | Wrap with throttle handling | +| `ExecuteUpdateMultipleBatchAsync` | `BulkOperationExecutor.cs` | Wrap with throttle handling | +| `ExecuteUpsertMultipleBatchAsync` | `BulkOperationExecutor.cs` | Wrap with throttle handling | +| `ExecuteElasticDeleteBatchAsync` | `BulkOperationExecutor.cs` | Wrap with throttle handling | +| `ExecuteStandardDeleteBatchAsync` | `BulkOperationExecutor.cs` | Wrap with throttle handling | + +### Connection Name Exposure + +The `IDataverseClient` interface needs to expose the connection name so we can record throttles against the correct connection: + +```csharp +public interface IDataverseClient +{ + // ... existing members ... + + /// + /// Gets the name of the connection this client belongs to. + /// + string ConnectionName { get; } +} +``` + +--- + +## Testing Requirements + +### Unit Tests + +1. **Throttle Detection** + - Verify `TryGetThrottleInfo` correctly identifies service protection errors + - Verify `Retry-After` extraction from `ErrorDetails` + - Verify fallback when `Retry-After` is missing + +2. **Throttle Recording** + - Verify `RecordThrottle` is called when throttle detected + - Verify correct connection name and `Retry-After` are recorded + +3. **Retry Logic** + - Verify immediate retry when other connections available + - Verify wait-and-retry when all connections throttled + - Verify max retries is respected + +4. **Statistics** + - Verify `ThrottledConnections` count is accurate + - Verify `TotalThrottleEvents` increments correctly + +### Integration Tests + +1. **Simulated Throttling** + - Mock `OrganizationServiceFault` with service protection error codes + - Verify routing switches to different connection + - Verify timing of retries matches `Retry-After` + +2. **Multi-Connection Routing** + - Configure 3 Application Users + - Throttle User1 + - Verify requests route to User2 and User3 + - Un-throttle User1 + - Verify User1 receives requests again + +### Load Testing + +1. **Push to Throttle** + - Run bulk operations until throttling occurs + - Verify throttle events are logged + - Verify statistics show throttle counts + - Verify throughput is maximized (routes away from throttled) + +--- + +## Acceptance Criteria + +1. [ ] Service protection errors (all 3 codes) are detected +2. [ ] `Retry-After` is extracted from `ErrorDetails` +3. [ ] `RecordThrottle()` is called for each throttle event +4. [ ] `ThrottleAwareStrategy` routes away from throttled connections +5. [ ] Immediate retry occurs when other connections available +6. [ ] Wait-and-retry occurs when all connections throttled +7. [ ] Max retries prevents infinite loops +8. [ ] `PoolStatistics.ThrottledConnections` shows current count +9. [ ] `PoolStatistics.TotalThrottleEvents` shows total count +10. [ ] Logging includes connection name, error code, and `Retry-After` +11. [ ] Throttle handling is configurable via `ResilienceOptions` +12. [ ] All unit tests pass +13. [ ] Integration tests verify multi-connection routing +14. [ ] Load test verifies throttle handling under pressure + +--- + +## Risks and Mitigations + +| Risk | Mitigation | +|------|------------| +| Double-retry (ServiceClient + ours) | ServiceClient's internal retry is for transient failures; service protection errors bubble up | +| Infinite retry loops | `MaxThrottleRetries` configuration with sensible default | +| Clock skew with `Retry-After` | Add small buffer (1-2 seconds) to wait time | +| Memory growth in `ThrottleTracker` | `CleanupExpired()` removes old entries | +| Race conditions in throttle state | Use `ConcurrentDictionary` (already implemented) | + +--- + +## Future Enhancements + +1. **Proactive Throttle Detection** - Track request rates and slow down before hitting limits +2. **Throttle Prediction** - Use historical data to predict when throttling will occur +3. **Adaptive Parallelism** - Automatically reduce `MaxDegreeOfParallelism` when throttling detected +4. **Circuit Breaker** - If a connection is repeatedly throttled, exclude it temporarily diff --git a/docs/specs/TVP_RACE_CONDITION_RETRY.md b/docs/specs/TVP_RACE_CONDITION_RETRY.md new file mode 100644 index 00000000..335dd14f --- /dev/null +++ b/docs/specs/TVP_RACE_CONDITION_RETRY.md @@ -0,0 +1,302 @@ +# TVP Race Condition Retry Specification + +**Status:** Draft +**Created:** 2025-12-22 +**Author:** Claude Code + +--- + +## Problem Statement + +When executing parallel bulk operations (`CreateMultiple`, `UpdateMultiple`, `UpsertMultiple`) against a **newly created Dataverse table**, a transient SQL error occurs due to an internal race condition in Dataverse's lazy initialization of bulk operation infrastructure. + +### Error Details + +``` +ErrorCode: 0x80044150 +CRM ErrorCode: -2147204784 +SQL ErrorCode: -2146232060 +SQL Number: 3732 + +Message: Cannot drop type 'ppds_ZipCodeBase_tvp' because it is being +referenced by object 'p_ppds_ZipCodeBase_UpdateMultiple'. There may be +other objects that reference this type. +``` + +### Root Cause + +1. Dataverse **lazily creates** internal SQL objects for bulk operations: + - Table-valued parameter types (TVPs): `{entity}_tvp` + - Stored procedures: `p_{entity}_CreateMultiple`, `p_{entity}_UpdateMultiple` + +2. When multiple parallel requests hit a table **before** these objects exist: + - Thread A creates the TVP + - Thread B creates the stored procedure referencing the TVP + - Thread A detects a schema mismatch, attempts to drop and recreate the TVP + - SQL Server rejects the drop because Thread B's stored procedure references it + +3. This is a **transient error** - subsequent requests succeed because the objects are now created. + +### Impact + +- First batch of parallel bulk operations fails (100 records marked as failed) +- Remaining batches succeed normally +- Error is **self-healing** but causes unnecessary failures and reduced throughput +- Particularly affects: + - Fresh table deployments + - CI/CD pipelines that recreate tables + - Development environments with frequent schema changes + +--- + +## Proposed Solution + +Add retry logic specifically for SQL error 3732 (TVP dependency conflict) to `BulkOperationExecutor`, following the existing pattern used for pool exhaustion retries. + +### Approach: Targeted Retry with Backoff + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Batch Execution Flow │ +└─────────────────────────────────────────────────────────────────┘ + + ExecuteBatch() + │ + ▼ + ┌─────────┐ + │ Execute │──success──▶ Return Result + │ Request │ + └────┬────┘ + │ + error + │ + ▼ + ┌─────────────────┐ + │ Is SQL 3732? │──no──▶ Propagate Error (existing behavior) + │ (TVP conflict) │ + └────────┬────────┘ + │ + yes + │ + ▼ + ┌─────────────────┐ + │ Retry < Max? │──no──▶ Propagate Error + └────────┬────────┘ + │ + yes + │ + ▼ + ┌─────────────────┐ + │ Wait (backoff) │ + │ 500ms, 1s, 2s │ + └────────┬────────┘ + │ + ▼ + Retry Execute +``` + +### Detection Logic + +```csharp +private static bool IsTvpRaceConditionError(Exception ex) +{ + // Check for FaultException with specific error codes + if (ex is FaultException fault) + { + // CRM ErrorCode: 0x80044150 (-2147204784) = Generic SQL error wrapper + // SQL Number: 3732 = Cannot drop type because referenced + var message = fault.Detail?.Message ?? ex.Message; + + return fault.Detail?.ErrorCode == unchecked((int)0x80044150) + && (message.Contains("3732") || message.Contains("Cannot drop type")); + } + return false; +} +``` + +### Retry Parameters + +| Parameter | Value | Rationale | +|-----------|-------|-----------| +| Max retries | 3 | TVP creation is fast; 3 attempts sufficient | +| Initial delay | 500ms | Allow other threads to complete TVP creation | +| Backoff multiplier | 2x | 500ms → 1s → 2s | +| Max delay | 2s | Don't wait too long for infrastructure issue | + +### Implementation Location + +Modify `BulkOperationExecutor.cs`: + +1. Add new constant: + ```csharp + private const int MaxTvpRetries = 3; + ``` + +2. Add detection method: + ```csharp + private static bool IsTvpRaceConditionError(Exception ex) + ``` + +3. Modify batch execution methods to wrap with retry: + - `ExecuteCreateMultipleBatchAsync` + - `ExecuteUpdateMultipleBatchAsync` + - `ExecuteUpsertMultipleBatchAsync` + +### Example Implementation + +```csharp +private async Task ExecuteWithTvpRetryAsync( + Func> operation, + string entityLogicalName, + int batchSize, + CancellationToken cancellationToken) +{ + for (int attempt = 1; attempt <= MaxTvpRetries; attempt++) + { + try + { + return await operation(); + } + catch (Exception ex) when (IsTvpRaceConditionError(ex) && attempt < MaxTvpRetries) + { + var delay = TimeSpan.FromMilliseconds(500 * Math.Pow(2, attempt - 1)); + _logger.LogWarning( + "TVP race condition detected for {Entity}, retrying in {Delay}ms (attempt {Attempt}/{Max})", + entityLogicalName, delay.TotalMilliseconds, attempt, MaxTvpRetries); + + await Task.Delay(delay, cancellationToken); + } + } + + // Unreachable: final attempt either succeeds or throws + throw new InvalidOperationException("Unexpected code path"); +} +``` + +--- + +## Alternatives Considered + +### Alternative A: Single-Batch Warmup + +Execute a single small batch sequentially before parallel execution to trigger TVP creation. + +```csharp +// Before parallel execution +if (batches.Count > 1) +{ + var warmupBatch = batches[0]; + await ExecuteBatchAsync(warmupBatch, cancellationToken); + batches = batches.Skip(1).ToList(); +} +// Then execute remaining in parallel +``` + +**Pros:** +- Prevents the error entirely +- Simpler logic (no retry handling) + +**Cons:** +- Adds latency to every bulk operation (not just new tables) +- Penalizes the common case to handle the rare case +- Doesn't help if table was just recreated mid-operation + +**Rejected:** Penalizes all operations for a rare edge case. + +### Alternative B: Pre-Check Table Metadata + +Query table metadata to detect if bulk operations have been used before. + +**Pros:** +- Could skip warmup for established tables + +**Cons:** +- No reliable API to detect TVP existence +- Adds complexity and an extra round-trip +- Metadata could be stale + +**Rejected:** No reliable detection mechanism exists. + +### Alternative C: Document and Accept + +Document the behavior and let callers handle retries. + +**Pros:** +- No code changes +- Users have full control + +**Cons:** +- Poor developer experience +- Inconsistent with existing retry patterns (pool exhaustion) +- Every consumer must implement retry logic + +**Rejected:** Violates principle of hiding infrastructure complexity. + +--- + +## Testing Strategy + +### Unit Tests + +1. **Detection test:** Verify `IsTvpRaceConditionError` correctly identifies the error +2. **Retry test:** Mock the error, verify retry with backoff +3. **Max retry test:** Verify error propagates after max attempts +4. **Success after retry:** Verify operation completes when retry succeeds + +### Integration Tests + +1. **New table test:** Create table, immediately run parallel bulk operation +2. **Existing table test:** Verify no unnecessary retries on established tables +3. **Concurrent test:** Multiple parallel operations on new table + +### Manual Validation + +Use the existing demo application: +```powershell +# Recreate schema and immediately load data +dotnet run -- create-geo-schema --delete-first +dotnet run -- load-geo-data --verbose +``` + +Expected: No failures logged for TVP race condition. + +--- + +## Logging + +### New Log Messages + +| Level | Event | Message | +|-------|-------|---------| +| Warning | TVP retry | `TVP race condition detected for {Entity}, retrying in {Delay}ms (attempt {Attempt}/{Max})` | +| Debug | TVP success | `TVP retry succeeded for {Entity} on attempt {Attempt}` | +| Error | TVP exhausted | `TVP race condition persisted after {Max} retries for {Entity}` | + +--- + +## Rollout Plan + +1. Implement in `feature/tvp-retry` branch +2. Add unit tests +3. Validate with demo application +4. Update CHANGELOG.md +5. PR to main +6. Include in next minor release + +--- + +## Success Criteria + +- [ ] Zero TVP race condition failures in demo application +- [ ] No performance regression for established tables +- [ ] All existing tests pass +- [ ] New unit tests cover retry logic +- [ ] Documentation updated + +--- + +## References + +- [Dataverse Bulk Operations](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/bulk-operations) +- [SQL Server Error 3732](https://learn.microsoft.com/en-us/sql/relational-databases/errors-events/database-engine-events-and-errors) - "Cannot drop type because it is being referenced" +- Existing pattern: `BulkOperationExecutor.GetClientWithRetryAsync()` (lines 371-394) diff --git a/src/PPDS.Dataverse/BulkOperations/BulkOperationExecutor.cs b/src/PPDS.Dataverse/BulkOperations/BulkOperationExecutor.cs index 27f9d5b5..35a96eff 100644 --- a/src/PPDS.Dataverse/BulkOperations/BulkOperationExecutor.cs +++ b/src/PPDS.Dataverse/BulkOperations/BulkOperationExecutor.cs @@ -1,7 +1,11 @@ using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; using System.Linq; +using System.Net.Http; +using System.Net.Sockets; +using System.ServiceModel; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; @@ -11,6 +15,9 @@ using Newtonsoft.Json; using PPDS.Dataverse.DependencyInjection; using PPDS.Dataverse.Pooling; +using PPDS.Dataverse.Progress; +using PPDS.Dataverse.Resilience; +using PPDS.Dataverse.Security; namespace PPDS.Dataverse.BulkOperations { @@ -20,6 +27,31 @@ namespace PPDS.Dataverse.BulkOperations /// public sealed class BulkOperationExecutor : IBulkOperationExecutor { + /// + /// Maximum number of retries when connection pool is exhausted. + /// + private const int MaxPoolExhaustionRetries = 3; + + /// + /// Maximum number of retries for TVP race condition errors on new tables. + /// + private const int MaxTvpRetries = 3; + + /// + /// Maximum number of retries for SQL deadlock errors. + /// + private const int MaxDeadlockRetries = 3; + + /// + /// CRM error code for generic SQL error wrapper that may contain TVP race condition. + /// + private const int SqlErrorCode = unchecked((int)0x80044150); + + /// + /// Fallback Retry-After duration when not provided by the server. + /// + private static readonly TimeSpan FallbackRetryAfter = TimeSpan.FromSeconds(30); + private readonly IDataverseConnectionPool _connectionPool; private readonly DataverseOptions _options; private readonly ILogger _logger; @@ -28,63 +60,138 @@ public sealed class BulkOperationExecutor : IBulkOperationExecutor /// Initializes a new instance of the class. /// /// The connection pool. + /// The throttle tracker. No longer used - pool handles throttle recording via PooledClient callback. Parameter kept for backwards compatibility. /// Configuration options. /// Logger instance. public BulkOperationExecutor( IDataverseConnectionPool connectionPool, + IThrottleTracker throttleTracker, IOptions options, ILogger logger) { _connectionPool = connectionPool ?? throw new ArgumentNullException(nameof(connectionPool)); + // throttleTracker parameter kept for backwards compatibility - pool now handles throttle recording + _ = throttleTracker ?? throw new ArgumentNullException(nameof(throttleTracker)); _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); _logger = logger ?? throw new ArgumentNullException(nameof(logger)); } + /// + /// Resolves the parallelism to use for batch processing. + /// Uses the explicit value if provided, otherwise queries the ServiceClient's RecommendedDegreesOfParallelism. + /// + private async Task ResolveParallelismAsync(int? maxParallelBatches, CancellationToken cancellationToken) + { + int parallelism; + + if (maxParallelBatches.HasValue) + { + parallelism = maxParallelBatches.Value; + } + else + { + // Get RecommendedDegreesOfParallelism from a connection + await using var client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken); + var recommended = client.RecommendedDegreesOfParallelism; + + if (recommended > 0) + { + _logger.LogDebug("Using RecommendedDegreesOfParallelism: {Parallelism}", recommended); + parallelism = recommended; + } + else + { + _logger.LogWarning("RecommendedDegreesOfParallelism unavailable or zero, using sequential processing"); + return 1; + } + } + + // Cap parallelism to pool size - can't run more parallel operations than available connections + var maxPoolSize = _options.Pool.MaxPoolSize; + if (parallelism > maxPoolSize) + { + _logger.LogWarning( + "MaxParallelBatches ({Parallelism}) exceeds MaxPoolSize ({MaxPoolSize}). " + + "Capping parallelism to {MaxPoolSize}. " + + "Consider increasing MaxPoolSize for optimal throughput.", + parallelism, maxPoolSize, maxPoolSize); + parallelism = maxPoolSize; + } + + return parallelism; + } + /// public async Task CreateMultipleAsync( string entityLogicalName, IEnumerable entities, BulkOperationOptions? options = null, + IProgress? progress = null, CancellationToken cancellationToken = default) { options ??= _options.BulkOperations; var entityList = entities.ToList(); + var parallelism = await ResolveParallelismAsync(options.MaxParallelBatches, cancellationToken); - _logger.LogInformation("CreateMultiple starting. Entity: {Entity}, Count: {Count}, ElasticTable: {ElasticTable}", - entityLogicalName, entityList.Count, options.ElasticTable); + _logger.LogInformation( + "CreateMultiple starting. Entity: {Entity}, Count: {Count}, ElasticTable: {ElasticTable}, Parallel: {Parallel}", + entityLogicalName, entityList.Count, options.ElasticTable, parallelism); var stopwatch = Stopwatch.StartNew(); - var allCreatedIds = new List(); - var allErrors = new List(); - var successCount = 0; + var batches = Batch(entityList, options.BatchSize).ToList(); + var tracker = new ProgressTracker(entityList.Count); - foreach (var batch in Batch(entityList, options.BatchSize)) + BulkOperationResult result; + if (parallelism > 1 && batches.Count > 1) + { + result = await ExecuteBatchesParallelAsync( + batches, + (batch, ct) => ExecuteCreateMultipleBatchAsync(entityLogicalName, batch, options, ct), + parallelism, + tracker, + progress, + cancellationToken); + } + else { - var batchResult = await ExecuteCreateMultipleBatchAsync( - entityLogicalName, batch, options, cancellationToken); + var allCreatedIds = new List(); + var allErrors = new List(); + var successCount = 0; - successCount += batchResult.SuccessCount; - allErrors.AddRange(batchResult.Errors); - if (batchResult.CreatedIds != null) + foreach (var batch in batches) { - allCreatedIds.AddRange(batchResult.CreatedIds); + var batchResult = await ExecuteCreateMultipleBatchAsync( + entityLogicalName, batch, options, cancellationToken); + + successCount += batchResult.SuccessCount; + allErrors.AddRange(batchResult.Errors); + if (batchResult.CreatedIds != null) + { + allCreatedIds.AddRange(batchResult.CreatedIds); + } + + tracker.RecordProgress(batchResult.SuccessCount, batchResult.FailureCount); + progress?.Report(tracker.GetSnapshot()); } + + result = new BulkOperationResult + { + SuccessCount = successCount, + FailureCount = allErrors.Count, + Errors = allErrors, + Duration = stopwatch.Elapsed, + CreatedIds = allCreatedIds.Count > 0 ? allCreatedIds : null + }; } stopwatch.Stop(); + result = result with { Duration = stopwatch.Elapsed }; _logger.LogInformation( "CreateMultiple completed. Entity: {Entity}, Success: {Success}, Failed: {Failed}, Duration: {Duration}ms", - entityLogicalName, successCount, allErrors.Count, stopwatch.ElapsedMilliseconds); + entityLogicalName, result.SuccessCount, result.FailureCount, stopwatch.ElapsedMilliseconds); - return new BulkOperationResult - { - SuccessCount = successCount, - FailureCount = allErrors.Count, - Errors = allErrors, - Duration = stopwatch.Elapsed, - CreatedIds = allCreatedIds.Count > 0 ? allCreatedIds : null - }; + return result; } /// @@ -92,40 +199,66 @@ public async Task UpdateMultipleAsync( string entityLogicalName, IEnumerable entities, BulkOperationOptions? options = null, + IProgress? progress = null, CancellationToken cancellationToken = default) { options ??= _options.BulkOperations; var entityList = entities.ToList(); + var parallelism = await ResolveParallelismAsync(options.MaxParallelBatches, cancellationToken); - _logger.LogInformation("UpdateMultiple starting. Entity: {Entity}, Count: {Count}, ElasticTable: {ElasticTable}", - entityLogicalName, entityList.Count, options.ElasticTable); + _logger.LogInformation( + "UpdateMultiple starting. Entity: {Entity}, Count: {Count}, ElasticTable: {ElasticTable}, Parallel: {Parallel}", + entityLogicalName, entityList.Count, options.ElasticTable, parallelism); var stopwatch = Stopwatch.StartNew(); - var allErrors = new List(); - var successCount = 0; + var batches = Batch(entityList, options.BatchSize).ToList(); + var tracker = new ProgressTracker(entityList.Count); - foreach (var batch in Batch(entityList, options.BatchSize)) + BulkOperationResult result; + if (parallelism > 1 && batches.Count > 1) { - var batchResult = await ExecuteUpdateMultipleBatchAsync( - entityLogicalName, batch, options, cancellationToken); + result = await ExecuteBatchesParallelAsync( + batches, + (batch, ct) => ExecuteUpdateMultipleBatchAsync(entityLogicalName, batch, options, ct), + parallelism, + tracker, + progress, + cancellationToken); + } + else + { + var allErrors = new List(); + var successCount = 0; - successCount += batchResult.SuccessCount; - allErrors.AddRange(batchResult.Errors); + foreach (var batch in batches) + { + var batchResult = await ExecuteUpdateMultipleBatchAsync( + entityLogicalName, batch, options, cancellationToken); + + successCount += batchResult.SuccessCount; + allErrors.AddRange(batchResult.Errors); + + tracker.RecordProgress(batchResult.SuccessCount, batchResult.FailureCount); + progress?.Report(tracker.GetSnapshot()); + } + + result = new BulkOperationResult + { + SuccessCount = successCount, + FailureCount = allErrors.Count, + Errors = allErrors, + Duration = stopwatch.Elapsed + }; } stopwatch.Stop(); + result = result with { Duration = stopwatch.Elapsed }; _logger.LogInformation( "UpdateMultiple completed. Entity: {Entity}, Success: {Success}, Failed: {Failed}, Duration: {Duration}ms", - entityLogicalName, successCount, allErrors.Count, stopwatch.ElapsedMilliseconds); + entityLogicalName, result.SuccessCount, result.FailureCount, stopwatch.ElapsedMilliseconds); - return new BulkOperationResult - { - SuccessCount = successCount, - FailureCount = allErrors.Count, - Errors = allErrors, - Duration = stopwatch.Elapsed - }; + return result; } /// @@ -133,40 +266,66 @@ public async Task UpsertMultipleAsync( string entityLogicalName, IEnumerable entities, BulkOperationOptions? options = null, + IProgress? progress = null, CancellationToken cancellationToken = default) { options ??= _options.BulkOperations; var entityList = entities.ToList(); + var parallelism = await ResolveParallelismAsync(options.MaxParallelBatches, cancellationToken); - _logger.LogInformation("UpsertMultiple starting. Entity: {Entity}, Count: {Count}, ElasticTable: {ElasticTable}", - entityLogicalName, entityList.Count, options.ElasticTable); + _logger.LogInformation( + "UpsertMultiple starting. Entity: {Entity}, Count: {Count}, ElasticTable: {ElasticTable}, Parallel: {Parallel}", + entityLogicalName, entityList.Count, options.ElasticTable, parallelism); var stopwatch = Stopwatch.StartNew(); - var allErrors = new List(); - var successCount = 0; + var batches = Batch(entityList, options.BatchSize).ToList(); + var tracker = new ProgressTracker(entityList.Count); - foreach (var batch in Batch(entityList, options.BatchSize)) + BulkOperationResult result; + if (parallelism > 1 && batches.Count > 1) + { + result = await ExecuteBatchesParallelAsync( + batches, + (batch, ct) => ExecuteUpsertMultipleBatchAsync(entityLogicalName, batch, options, ct), + parallelism, + tracker, + progress, + cancellationToken); + } + else { - var batchResult = await ExecuteUpsertMultipleBatchAsync( - entityLogicalName, batch, options, cancellationToken); + var allErrors = new List(); + var successCount = 0; + + foreach (var batch in batches) + { + var batchResult = await ExecuteUpsertMultipleBatchAsync( + entityLogicalName, batch, options, cancellationToken); - successCount += batchResult.SuccessCount; - allErrors.AddRange(batchResult.Errors); + successCount += batchResult.SuccessCount; + allErrors.AddRange(batchResult.Errors); + + tracker.RecordProgress(batchResult.SuccessCount, batchResult.FailureCount); + progress?.Report(tracker.GetSnapshot()); + } + + result = new BulkOperationResult + { + SuccessCount = successCount, + FailureCount = allErrors.Count, + Errors = allErrors, + Duration = stopwatch.Elapsed + }; } stopwatch.Stop(); + result = result with { Duration = stopwatch.Elapsed }; _logger.LogInformation( "UpsertMultiple completed. Entity: {Entity}, Success: {Success}, Failed: {Failed}, Duration: {Duration}ms", - entityLogicalName, successCount, allErrors.Count, stopwatch.ElapsedMilliseconds); + entityLogicalName, result.SuccessCount, result.FailureCount, stopwatch.ElapsedMilliseconds); - return new BulkOperationResult - { - SuccessCount = successCount, - FailureCount = allErrors.Count, - Errors = allErrors, - Duration = stopwatch.Elapsed - }; + return result; } /// @@ -174,58 +333,530 @@ public async Task DeleteMultipleAsync( string entityLogicalName, IEnumerable ids, BulkOperationOptions? options = null, + IProgress? progress = null, CancellationToken cancellationToken = default) { options ??= _options.BulkOperations; var idList = ids.ToList(); + var parallelism = await ResolveParallelismAsync(options.MaxParallelBatches, cancellationToken); - _logger.LogInformation("DeleteMultiple starting. Entity: {Entity}, Count: {Count}, ElasticTable: {ElasticTable}", - entityLogicalName, idList.Count, options.ElasticTable); + _logger.LogInformation( + "DeleteMultiple starting. Entity: {Entity}, Count: {Count}, ElasticTable: {ElasticTable}, Parallel: {Parallel}", + entityLogicalName, idList.Count, options.ElasticTable, parallelism); var stopwatch = Stopwatch.StartNew(); - var allErrors = new List(); - var successCount = 0; + var batches = Batch(idList, options.BatchSize).ToList(); + var tracker = new ProgressTracker(idList.Count); + + // Select the appropriate batch execution function based on table type + Func, CancellationToken, Task> executeBatch = options.ElasticTable + ? (batch, ct) => ExecuteElasticDeleteBatchAsync(entityLogicalName, batch, options, ct) + : (batch, ct) => ExecuteStandardDeleteBatchAsync(entityLogicalName, batch, options, ct); - foreach (var batch in Batch(idList, options.BatchSize)) + BulkOperationResult result; + if (parallelism > 1 && batches.Count > 1) { - BulkOperationResult batchResult; - if (options.ElasticTable) - { - batchResult = await ExecuteElasticDeleteBatchAsync( - entityLogicalName, batch, options, cancellationToken); - } - else + result = await ExecuteBatchesParallelAsync(batches, executeBatch, parallelism, tracker, progress, cancellationToken); + } + else + { + var allErrors = new List(); + var successCount = 0; + + foreach (var batch in batches) { - batchResult = await ExecuteStandardDeleteBatchAsync( - entityLogicalName, batch, options, cancellationToken); + var batchResult = await executeBatch(batch, cancellationToken); + successCount += batchResult.SuccessCount; + allErrors.AddRange(batchResult.Errors); + + tracker.RecordProgress(batchResult.SuccessCount, batchResult.FailureCount); + progress?.Report(tracker.GetSnapshot()); } - successCount += batchResult.SuccessCount; - allErrors.AddRange(batchResult.Errors); + result = new BulkOperationResult + { + SuccessCount = successCount, + FailureCount = allErrors.Count, + Errors = allErrors, + Duration = stopwatch.Elapsed + }; } stopwatch.Stop(); + result = result with { Duration = stopwatch.Elapsed }; _logger.LogInformation( "DeleteMultiple completed. Entity: {Entity}, Success: {Success}, Failed: {Failed}, Duration: {Duration}ms", - entityLogicalName, successCount, allErrors.Count, stopwatch.ElapsedMilliseconds); + entityLogicalName, result.SuccessCount, result.FailureCount, stopwatch.ElapsedMilliseconds); - return new BulkOperationResult + return result; + } + + /// + /// Gets a connection from the pool with retry logic for pool exhaustion. + /// + /// The cancellation token. + /// A pooled client. + /// Thrown when the pool remains exhausted after all retries. + private async Task GetClientWithRetryAsync(CancellationToken cancellationToken) + { + // Attempts are 1-indexed for clearer logging + for (int attempt = 1; attempt <= MaxPoolExhaustionRetries; attempt++) { - SuccessCount = successCount, - FailureCount = allErrors.Count, - Errors = allErrors, - Duration = stopwatch.Elapsed - }; + try + { + return await _connectionPool.GetClientAsync(cancellationToken: cancellationToken); + } + catch (PoolExhaustedException) when (attempt < MaxPoolExhaustionRetries) + { + // Exponential backoff: 1s, 2s before attempts 2 and 3 + var delay = TimeSpan.FromSeconds(Math.Pow(2, attempt - 1)); + _logger.LogWarning( + "Connection pool exhausted, waiting for connection (attempt {Attempt}/{MaxRetries}, delay: {Delay}s)", + attempt, MaxPoolExhaustionRetries, delay.TotalSeconds); + await Task.Delay(delay, cancellationToken); + } + // On final attempt, PoolExhaustedException propagates to caller + } + + // Unreachable: loop either returns a client or throws on final attempt + throw new InvalidOperationException("Unexpected code path in connection pool retry logic"); + } + + /// + /// Checks if an exception is a service protection throttle error and extracts the Retry-After duration. + /// + /// The exception to check. + /// The Retry-After duration if this is a throttle error. + /// The service protection error code if this is a throttle error. + /// True if this is a service protection error. + private bool TryGetThrottleInfo(Exception exception, out TimeSpan retryAfter, out int errorCode) + { + retryAfter = TimeSpan.Zero; + errorCode = 0; + + // Check if this is a FaultException with OrganizationServiceFault + if (exception is not FaultException faultEx) + { + return false; + } + + var fault = faultEx.Detail; + errorCode = fault.ErrorCode; + + // Check if this is a service protection error + if (!ServiceProtectionException.IsServiceProtectionError(errorCode)) + { + return false; + } + + // Extract Retry-After from ErrorDetails + if (fault.ErrorDetails != null && + fault.ErrorDetails.TryGetValue("Retry-After", out var retryAfterObj)) + { + if (retryAfterObj is TimeSpan retryAfterSpan) + { + retryAfter = retryAfterSpan; + } + else if (retryAfterObj is int retryAfterSeconds) + { + retryAfter = TimeSpan.FromSeconds(retryAfterSeconds); + } + else if (retryAfterObj is double retryAfterDouble) + { + retryAfter = TimeSpan.FromSeconds(retryAfterDouble); + } + else + { + _logger.LogWarning( + "Unexpected Retry-After type: {Type}. Using fallback.", + retryAfterObj?.GetType().Name ?? "null"); + retryAfter = FallbackRetryAfter; + } + } + else + { + _logger.LogWarning( + "Service protection error without Retry-After. ErrorCode: {ErrorCode}. Using fallback: {Fallback}s", + errorCode, FallbackRetryAfter.TotalSeconds); + retryAfter = FallbackRetryAfter; + } + + return true; + } + + /// + /// Checks if an exception indicates an authentication/authorization failure. + /// + /// The exception to check. + /// True if this is an authentication failure. + private static bool IsAuthFailure(Exception exception) + { + // Check for common auth failure patterns in FaultException + if (exception is FaultException faultEx) + { + var fault = faultEx.Detail; + + // Common auth error codes + // -2147180286: Caller does not have privilege + // -2147204720: User is disabled + // -2147180285: AccessDenied + var authErrorCodes = new[] + { + -2147180286, // No privilege + -2147204720, // User disabled + -2147180285, // Access denied + }; + + if (authErrorCodes.Contains(fault.ErrorCode)) + { + return true; + } + + // Check message for auth-related keywords + var message = fault.Message?.ToLowerInvariant() ?? ""; + if (message.Contains("authentication") || + message.Contains("authorization") || + message.Contains("token") || + message.Contains("expired") || + message.Contains("credential")) + { + return true; + } + } + + // Check for HTTP 401/403 in inner exceptions + if (exception.InnerException is HttpRequestException httpEx) + { + var message = httpEx.Message?.ToLowerInvariant() ?? ""; + if (message.Contains("401") || message.Contains("403") || + message.Contains("unauthorized") || message.Contains("forbidden")) + { + return true; + } + } + + return false; + } + + /// + /// Checks if an exception indicates a connection/network failure. + /// + /// The exception to check. + /// True if this is a connection failure. + private static bool IsConnectionFailure(Exception exception) + { + return exception is HttpRequestException || + exception is SocketException || + exception is DataverseConnectionException || + exception.InnerException is SocketException || + exception.InnerException is HttpRequestException; + } + + /// + /// Extracts the connection name from an exception when the client is null. + /// This handles the case where connection creation itself failed. + /// + /// The exception to extract from. + /// The fallback value if connection name cannot be extracted. + /// The connection name or fallback. + private static string GetConnectionNameFromException(Exception exception, string fallback) + { + if (exception is DataverseConnectionException dce && !string.IsNullOrEmpty(dce.ConnectionName)) + { + return dce.ConnectionName; + } + + return fallback; + } + + /// + /// Checks if an exception is a TVP race condition error that occurs on newly created tables. + /// This happens when parallel bulk operations hit a table before Dataverse has created + /// the internal TVP types and stored procedures. + /// SQL Error 3732: Cannot drop type because it is being referenced by another object. + /// + /// The exception to check. + /// True if this is a TVP race condition error. + private static bool IsTvpRaceConditionError(Exception exception) + { + if (exception is not FaultException faultEx) + { + return false; + } + + var fault = faultEx.Detail; + + // Check for the generic SQL error wrapper code + if (fault.ErrorCode != SqlErrorCode) + { + return false; + } + + // Check the message for the specific SQL error 3732 (Cannot drop type) + var message = fault.Message ?? string.Empty; + return message.Contains("3732") || message.Contains("Cannot drop type"); + } + + /// + /// Checks if an exception is a SQL deadlock error. + /// SQL Error 1205: Transaction was deadlocked on resources with another process and has been chosen as the deadlock victim. + /// These are transient errors that occur under high concurrency and should be retried. + /// + /// The exception to check. + /// True if this is a deadlock error. + private static bool IsDeadlockError(Exception exception) + { + if (exception is not FaultException faultEx) + { + return false; + } + + var fault = faultEx.Detail; + + // Check for the generic SQL error wrapper code (same as TVP) + if (fault.ErrorCode != SqlErrorCode) + { + return false; + } + + // Check the message for SQL error 1205 (deadlock) + var message = fault.Message ?? string.Empty; + return message.Contains("1205") || message.Contains("deadlock", StringComparison.OrdinalIgnoreCase); + } + + /// + /// Logs a throttle error. The pool handles retry timing via GetClientAsync. + /// PooledClient automatically records the throttle via callback. + /// + /// The name of the connection that was throttled. + /// The Retry-After duration. + /// The service protection error code. + private void LogThrottle(string connectionName, TimeSpan retryAfter, int errorCode) + { + // Note: PooledClient already recorded this throttle via callback. + // We just log for visibility - pool handles waiting via GetClientAsync. + _logger.LogWarning( + "Service protection limit hit. Connection: {Connection}, ErrorCode: {ErrorCode}, " + + "RetryAfter: {RetryAfter}. Pool will wait for non-throttled connection.", + connectionName, errorCode, retryAfter); } - private async Task ExecuteCreateMultipleBatchAsync( + /// + /// Executes a batch operation with throttle detection, connection health management, and intelligent retry. + /// Service protection errors retry indefinitely - the pool handles waiting for non-throttled connections. + /// On auth/connection failure, marks the connection as invalid and retries with a new connection. + /// + private async Task ExecuteBatchWithThrottleHandlingAsync( + string operationName, + string entityLogicalName, + List batch, + BulkOperationOptions options, + Func, CancellationToken, Task> executeBatch, + CancellationToken cancellationToken) + { + var attempt = 0; + var maxRetries = _options.Pool.MaxConnectionRetries; + + // Loop indefinitely for service protection errors - only CancellationToken stops us. + // Other transient errors (auth, connection, TVP, deadlock) have finite retry limits. + while (true) + { + attempt++; + IPooledClient? client = null; + string connectionName = "unknown"; + + try + { + client = await GetClientWithRetryAsync(cancellationToken); + connectionName = client.ConnectionName; + + return await executeBatch(client, batch, cancellationToken); + } + catch (Exception ex) when (TryGetThrottleInfo(ex, out var retryAfter, out var errorCode)) + { + // Service protection is transient - always retry, never fail. + // PooledClient already recorded the throttle via callback. + // GetClientAsync will wait for a non-throttled connection. + LogThrottle(connectionName, retryAfter, errorCode); + + // Continue to next iteration - pool handles the waiting + } + catch (Exception ex) when (IsAuthFailure(ex)) + { + // Extract connection name from exception if client is null + var failedConnection = client?.ConnectionName + ?? GetConnectionNameFromException(ex, connectionName); + + _logger.LogWarning( + "Authentication failure on connection {Connection}. " + + "Marking invalid and retrying. Attempt: {Attempt}/{MaxRetries}. Error: {Error}", + failedConnection, attempt, maxRetries, ex.Message); + + // Mark connection as invalid - it won't be returned to pool + client?.MarkInvalid($"Auth failure: {ex.Message}"); + + // Record the failure for statistics + _connectionPool.RecordAuthFailure(); + + if (attempt >= maxRetries) + { + throw new DataverseConnectionException( + failedConnection, + $"Authentication failure after {attempt} attempts", + ex); + } + + // Continue to next iteration to retry with new connection + } + catch (Exception ex) when (IsConnectionFailure(ex)) + { + // Extract connection name from exception if client is null (connection creation failed) + var failedConnection = client?.ConnectionName + ?? GetConnectionNameFromException(ex, connectionName); + + _logger.LogWarning( + "Connection failure on {Connection}. " + + "Marking invalid and retrying. Attempt: {Attempt}/{MaxRetries}. Error: {Error}", + failedConnection, attempt, maxRetries, ex.Message); + + // Mark connection as invalid (only if we have a client instance) + client?.MarkInvalid($"Connection failure: {ex.Message}"); + + // Record the failure for statistics + _connectionPool.RecordConnectionFailure(); + + if (attempt >= maxRetries) + { + throw new DataverseConnectionException( + failedConnection, + $"Connection failure after {attempt} attempts", + ex); + } + + // Continue to next iteration to retry with new connection + } + catch (Exception ex) when (IsTvpRaceConditionError(ex)) + { + // Exponential backoff: 500ms, 1s, 2s + var delay = TimeSpan.FromMilliseconds(500 * Math.Pow(2, attempt - 1)); + + _logger.LogWarning( + "TVP race condition detected for {Entity}. " + + "This is transient on new tables. Retrying in {Delay}ms. Attempt: {Attempt}/{MaxTvpRetries}", + entityLogicalName, delay.TotalMilliseconds, attempt, MaxTvpRetries); + + if (attempt >= MaxTvpRetries) + { + _logger.LogError( + "TVP race condition persisted after {MaxRetries} retries for {Entity}. " + + "This may indicate a schema issue or concurrent schema modification.", + MaxTvpRetries, entityLogicalName); + throw; + } + + await Task.Delay(delay, cancellationToken); + + // Continue to next iteration to retry + } + catch (Exception ex) when (IsDeadlockError(ex)) + { + // Exponential backoff: 500ms, 1s, 2s + var delay = TimeSpan.FromMilliseconds(500 * Math.Pow(2, attempt - 1)); + + _logger.LogWarning( + "SQL deadlock detected for {Entity}. " + + "This is transient under high concurrency. Retrying in {Delay}ms. Attempt: {Attempt}/{MaxDeadlockRetries}", + entityLogicalName, delay.TotalMilliseconds, attempt, MaxDeadlockRetries); + + if (attempt >= MaxDeadlockRetries) + { + _logger.LogError( + "SQL deadlock persisted after {MaxRetries} retries for {Entity}. " + + "Consider reducing parallelism or batch size.", + MaxDeadlockRetries, entityLogicalName); + throw; + } + + await Task.Delay(delay, cancellationToken); + + // Continue to next iteration to retry + } + catch (Exception ex) when (ex is TaskCanceledException or OperationCanceledException) + { + // Cancellation is expected when Parallel.ForEachAsync cancels remaining operations + // after one batch fails. Don't log as error - just return a canceled result. + _logger.LogDebug( + "{Operation} batch canceled. Entity: {Entity}, BatchSize: {BatchSize}", + operationName, entityLogicalName, batch.Count); + + // Return empty result - the batch wasn't processed, not failed + return new BulkOperationResult + { + SuccessCount = 0, + FailureCount = 0, + Errors = Array.Empty(), + Duration = TimeSpan.Zero + }; + } + catch (Exception ex) + { + // Non-retryable error - convert to failure result + _logger.LogError(ex, "{Operation} batch failed with non-retryable error. Entity: {Entity}, BatchSize: {BatchSize}", + operationName, entityLogicalName, batch.Count); + + // Create appropriate failure result based on batch type + if (batch is List entityBatch) + { + return CreateFailureResultForEntities(entityBatch, ex); + } + else if (batch is List idBatch) + { + return CreateFailureResultForIds(idBatch, ex); + } + else + { + // Unknown batch type - rethrow + throw; + } + } + finally + { + if (client != null) + { + await client.DisposeAsync(); + } + } + } + + // Unreachable: loop only exits via return (success/failure result), + // throw (non-retryable error), or cancellation (throws OperationCanceledException) + } + + private Task ExecuteCreateMultipleBatchAsync( string entityLogicalName, List batch, BulkOperationOptions options, CancellationToken cancellationToken) { - await using var client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken); + return ExecuteBatchWithThrottleHandlingAsync( + "CreateMultiple", + entityLogicalName, + batch, + options, + (client, b, ct) => ExecuteCreateMultipleCoreAsync(client, entityLogicalName, b, options, ct), + cancellationToken); + } + + private async Task ExecuteCreateMultipleCoreAsync( + IPooledClient client, + string entityLogicalName, + List batch, + BulkOperationOptions options, + CancellationToken cancellationToken) + { + _logger.LogDebug("Executing CreateMultiple batch. Entity: {Entity}, BatchSize: {BatchSize}, Connection: {Connection}", + entityLogicalName, batch.Count, client.ConnectionName); var targets = new EntityCollection(batch) { EntityName = entityLogicalName }; var request = new CreateMultipleRequest { Targets = targets }; @@ -236,6 +867,9 @@ private async Task ExecuteCreateMultipleBatchAsync( { var response = (CreateMultipleResponse)await client.ExecuteAsync(request, cancellationToken); + _logger.LogDebug("CreateMultiple batch completed. Entity: {Entity}, Created: {Created}", + entityLogicalName, response.Ids.Length); + return new BulkOperationResult { SuccessCount = response.Ids.Length, @@ -247,7 +881,7 @@ private async Task ExecuteCreateMultipleBatchAsync( } catch (Exception ex) when (options.ElasticTable && TryExtractBulkApiErrors(ex, batch, out var errors, out var successCount)) { - // Elastic tables support partial success + // Elastic tables support partial success - this is expected behavior, not an error return new BulkOperationResult { SuccessCount = successCount, @@ -256,37 +890,33 @@ private async Task ExecuteCreateMultipleBatchAsync( Duration = TimeSpan.Zero }; } - catch (Exception ex) - { - // Standard tables: entire batch fails - _logger.LogError(ex, "CreateMultiple batch failed. Entity: {Entity}, BatchSize: {BatchSize}", - entityLogicalName, batch.Count); - - var errors = batch.Select((e, i) => new BulkOperationError - { - Index = i, - RecordId = e.Id != Guid.Empty ? e.Id : null, - ErrorCode = -1, - Message = ex.Message - }).ToList(); + // All other errors propagate to wrapper for retry or failure handling + } - return new BulkOperationResult - { - SuccessCount = 0, - FailureCount = batch.Count, - Errors = errors, - Duration = TimeSpan.Zero - }; - } + private Task ExecuteUpdateMultipleBatchAsync( + string entityLogicalName, + List batch, + BulkOperationOptions options, + CancellationToken cancellationToken) + { + return ExecuteBatchWithThrottleHandlingAsync( + "UpdateMultiple", + entityLogicalName, + batch, + options, + (client, b, ct) => ExecuteUpdateMultipleCoreAsync(client, entityLogicalName, b, options, ct), + cancellationToken); } - private async Task ExecuteUpdateMultipleBatchAsync( + private async Task ExecuteUpdateMultipleCoreAsync( + IPooledClient client, string entityLogicalName, List batch, BulkOperationOptions options, CancellationToken cancellationToken) { - await using var client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken); + _logger.LogDebug("Executing UpdateMultiple batch. Entity: {Entity}, BatchSize: {BatchSize}, Connection: {Connection}", + entityLogicalName, batch.Count, client.ConnectionName); var targets = new EntityCollection(batch) { EntityName = entityLogicalName }; var request = new UpdateMultipleRequest { Targets = targets }; @@ -297,6 +927,9 @@ private async Task ExecuteUpdateMultipleBatchAsync( { await client.ExecuteAsync(request, cancellationToken); + _logger.LogDebug("UpdateMultiple batch completed. Entity: {Entity}, Updated: {Updated}", + entityLogicalName, batch.Count); + return new BulkOperationResult { SuccessCount = batch.Count, @@ -307,6 +940,7 @@ private async Task ExecuteUpdateMultipleBatchAsync( } catch (Exception ex) when (options.ElasticTable && TryExtractBulkApiErrors(ex, batch, out var errors, out var successCount)) { + // Elastic tables support partial success - this is expected behavior, not an error return new BulkOperationResult { SuccessCount = successCount, @@ -315,36 +949,33 @@ private async Task ExecuteUpdateMultipleBatchAsync( Duration = TimeSpan.Zero }; } - catch (Exception ex) - { - _logger.LogError(ex, "UpdateMultiple batch failed. Entity: {Entity}, BatchSize: {BatchSize}", - entityLogicalName, batch.Count); - - var errors = batch.Select((e, i) => new BulkOperationError - { - Index = i, - RecordId = e.Id, - ErrorCode = -1, - Message = ex.Message - }).ToList(); + // All other errors propagate to wrapper for retry or failure handling + } - return new BulkOperationResult - { - SuccessCount = 0, - FailureCount = batch.Count, - Errors = errors, - Duration = TimeSpan.Zero - }; - } + private Task ExecuteUpsertMultipleBatchAsync( + string entityLogicalName, + List batch, + BulkOperationOptions options, + CancellationToken cancellationToken) + { + return ExecuteBatchWithThrottleHandlingAsync( + "UpsertMultiple", + entityLogicalName, + batch, + options, + (client, b, ct) => ExecuteUpsertMultipleCoreAsync(client, entityLogicalName, b, options, ct), + cancellationToken); } - private async Task ExecuteUpsertMultipleBatchAsync( + private async Task ExecuteUpsertMultipleCoreAsync( + IPooledClient client, string entityLogicalName, List batch, BulkOperationOptions options, CancellationToken cancellationToken) { - await using var client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken); + _logger.LogDebug("Executing UpsertMultiple batch. Entity: {Entity}, BatchSize: {BatchSize}, Connection: {Connection}", + entityLogicalName, batch.Count, client.ConnectionName); var targets = new EntityCollection(batch) { EntityName = entityLogicalName }; var request = new UpsertMultipleRequest { Targets = targets }; @@ -355,6 +986,9 @@ private async Task ExecuteUpsertMultipleBatchAsync( { var response = (UpsertMultipleResponse)await client.ExecuteAsync(request, cancellationToken); + _logger.LogDebug("UpsertMultiple batch completed. Entity: {Entity}, Success: {Success}", + entityLogicalName, batch.Count); + return new BulkOperationResult { SuccessCount = batch.Count, @@ -365,6 +999,7 @@ private async Task ExecuteUpsertMultipleBatchAsync( } catch (Exception ex) when (options.ElasticTable && TryExtractBulkApiErrors(ex, batch, out var errors, out var successCount)) { + // Elastic tables support partial success - this is expected behavior, not an error return new BulkOperationResult { SuccessCount = successCount, @@ -373,36 +1008,33 @@ private async Task ExecuteUpsertMultipleBatchAsync( Duration = TimeSpan.Zero }; } - catch (Exception ex) - { - _logger.LogError(ex, "UpsertMultiple batch failed. Entity: {Entity}, BatchSize: {BatchSize}", - entityLogicalName, batch.Count); - - var errors = batch.Select((e, i) => new BulkOperationError - { - Index = i, - RecordId = e.Id != Guid.Empty ? e.Id : null, - ErrorCode = -1, - Message = ex.Message - }).ToList(); + // All other errors propagate to wrapper for retry or failure handling + } - return new BulkOperationResult - { - SuccessCount = 0, - FailureCount = batch.Count, - Errors = errors, - Duration = TimeSpan.Zero - }; - } + private Task ExecuteElasticDeleteBatchAsync( + string entityLogicalName, + List batch, + BulkOperationOptions options, + CancellationToken cancellationToken) + { + return ExecuteBatchWithThrottleHandlingAsync( + "DeleteMultiple (elastic)", + entityLogicalName, + batch, + options, + (client, b, ct) => ExecuteElasticDeleteCoreAsync(client, entityLogicalName, b, options, ct), + cancellationToken); } - private async Task ExecuteElasticDeleteBatchAsync( + private async Task ExecuteElasticDeleteCoreAsync( + IPooledClient client, string entityLogicalName, List batch, BulkOperationOptions options, CancellationToken cancellationToken) { - await using var client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken); + _logger.LogDebug("Executing DeleteMultiple (elastic) batch. Entity: {Entity}, BatchSize: {BatchSize}, Connection: {Connection}", + entityLogicalName, batch.Count, client.ConnectionName); var entityReferences = batch .Select(id => new EntityReference(entityLogicalName, id)) @@ -429,6 +1061,7 @@ private async Task ExecuteElasticDeleteBatchAsync( } catch (Exception ex) when (TryExtractBulkApiErrorsForDelete(ex, batch, out var errors, out var successCount)) { + // DeleteMultiple supports partial success - this is expected behavior, not an error return new BulkOperationResult { SuccessCount = successCount, @@ -437,36 +1070,33 @@ private async Task ExecuteElasticDeleteBatchAsync( Duration = TimeSpan.Zero }; } - catch (Exception ex) - { - _logger.LogError(ex, "DeleteMultiple (elastic) batch failed. Entity: {Entity}, BatchSize: {BatchSize}", - entityLogicalName, batch.Count); - - var errors = batch.Select((id, i) => new BulkOperationError - { - Index = i, - RecordId = id, - ErrorCode = -1, - Message = ex.Message - }).ToList(); + // All other errors propagate to wrapper for retry or failure handling + } - return new BulkOperationResult - { - SuccessCount = 0, - FailureCount = batch.Count, - Errors = errors, - Duration = TimeSpan.Zero - }; - } + private Task ExecuteStandardDeleteBatchAsync( + string entityLogicalName, + List batch, + BulkOperationOptions options, + CancellationToken cancellationToken) + { + return ExecuteBatchWithThrottleHandlingAsync( + "DeleteMultiple (standard)", + entityLogicalName, + batch, + options, + (client, b, ct) => ExecuteStandardDeleteCoreAsync(client, entityLogicalName, b, options, ct), + cancellationToken); } - private async Task ExecuteStandardDeleteBatchAsync( + private async Task ExecuteStandardDeleteCoreAsync( + IPooledClient client, string entityLogicalName, List batch, BulkOperationOptions options, CancellationToken cancellationToken) { - await using var client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken); + _logger.LogDebug("Executing DeleteMultiple (standard) batch. Entity: {Entity}, BatchSize: {BatchSize}, Connection: {Connection}", + entityLogicalName, batch.Count, client.ConnectionName); var executeMultiple = new ExecuteMultipleRequest { @@ -489,6 +1119,8 @@ private async Task ExecuteStandardDeleteBatchAsync( executeMultiple.Requests.Add(deleteRequest); } + // Note: ExecuteMultiple can also throw service protection errors + // These will propagate to the wrapper for retry var response = (ExecuteMultipleResponse)await client.ExecuteAsync(executeMultiple, cancellationToken); var errors = new List(); @@ -662,6 +1294,109 @@ private bool TryExtractFromFaultForDelete( return false; } + /// + /// Executes batches in parallel with bounded concurrency. + /// + private static async Task ExecuteBatchesParallelAsync( + List> batches, + Func, CancellationToken, Task> executeBatch, + int maxParallelism, + ProgressTracker tracker, + IProgress? progress, + CancellationToken cancellationToken) + { + var allErrors = new ConcurrentBag(); + var allCreatedIds = new ConcurrentBag(); + var successCount = 0; + var failureCount = 0; + + await Parallel.ForEachAsync( + batches, + new ParallelOptions + { + MaxDegreeOfParallelism = maxParallelism, + CancellationToken = cancellationToken + }, + async (batch, ct) => + { + // Use the combined cancellation token (ct) which includes Parallel.ForEachAsync's internal cancellation + var batchResult = await executeBatch(batch, ct).ConfigureAwait(false); + + Interlocked.Add(ref successCount, batchResult.SuccessCount); + Interlocked.Add(ref failureCount, batchResult.FailureCount); + + foreach (var error in batchResult.Errors) + { + allErrors.Add(error); + } + + if (batchResult.CreatedIds != null) + { + foreach (var id in batchResult.CreatedIds) + { + allCreatedIds.Add(id); + } + } + + // Report progress after each batch + tracker.RecordProgress(batchResult.SuccessCount, batchResult.FailureCount); + progress?.Report(tracker.GetSnapshot()); + }).ConfigureAwait(false); + + return new BulkOperationResult + { + SuccessCount = successCount, + FailureCount = failureCount, + Errors = allErrors.ToList(), + Duration = TimeSpan.Zero, + CreatedIds = allCreatedIds.Count > 0 ? allCreatedIds.ToList() : null + }; + } + + /// + /// Creates a failure result for a batch of entities that failed due to a non-retryable error. + /// + private static BulkOperationResult CreateFailureResultForEntities(List batch, Exception ex) + { + var errors = batch.Select((e, i) => new BulkOperationError + { + Index = i, + RecordId = e.Id != Guid.Empty ? e.Id : null, + ErrorCode = -1, + Message = ex.Message + }).ToList(); + + return new BulkOperationResult + { + SuccessCount = 0, + FailureCount = batch.Count, + Errors = errors, + Duration = TimeSpan.Zero + }; + } + + /// + /// Creates a failure result for a batch of IDs that failed due to a non-retryable error. + /// + private static BulkOperationResult CreateFailureResultForIds(List batch, Exception ex) + { + var errors = batch.Select((id, i) => new BulkOperationError + { + Index = i, + RecordId = id, + ErrorCode = -1, + Message = ex.Message + }).ToList(); + + return new BulkOperationResult + { + SuccessCount = 0, + FailureCount = batch.Count, + Errors = errors, + Duration = TimeSpan.Zero + }; + } + private static IEnumerable> Batch(IEnumerable source, int batchSize) { var batch = new List(batchSize); diff --git a/src/PPDS.Dataverse/BulkOperations/BulkOperationOptions.cs b/src/PPDS.Dataverse/BulkOperations/BulkOperationOptions.cs index b815c97b..1f493cf2 100644 --- a/src/PPDS.Dataverse/BulkOperations/BulkOperationOptions.cs +++ b/src/PPDS.Dataverse/BulkOperations/BulkOperationOptions.cs @@ -7,10 +7,10 @@ public class BulkOperationOptions { /// /// Gets or sets the number of records per batch. - /// Recommendation: 1000 for standard tables, 100 for elastic tables. - /// Default: 1000 (Dataverse maximum for standard tables) + /// Benchmarks show 100 is optimal for both standard and elastic tables. + /// Default: 100 /// - public int BatchSize { get; set; } = 1000; + public int BatchSize { get; set; } = 100; /// /// Gets or sets a value indicating whether the target is an elastic table (Cosmos DB-backed). @@ -79,5 +79,19 @@ public class BulkOperationOptions /// Default: false /// public bool SuppressDuplicateDetection { get; set; } = false; + + /// + /// Gets or sets the maximum number of batches to process in parallel. + /// + /// When null (default), uses the ServiceClient's RecommendedDegreesOfParallelism + /// which comes from the x-ms-dop-hint response header from Dataverse. + /// + /// + /// Set to 1 for sequential processing, or a specific value to override + /// Microsoft's recommendation. + /// + /// Default: null (use RecommendedDegreesOfParallelism) + /// + public int? MaxParallelBatches { get; set; } = null; } } diff --git a/src/PPDS.Dataverse/BulkOperations/BulkOperationResult.cs b/src/PPDS.Dataverse/BulkOperations/BulkOperationResult.cs index 31d9aa40..9f9efe1a 100644 --- a/src/PPDS.Dataverse/BulkOperations/BulkOperationResult.cs +++ b/src/PPDS.Dataverse/BulkOperations/BulkOperationResult.cs @@ -6,7 +6,7 @@ namespace PPDS.Dataverse.BulkOperations /// /// Result of a bulk operation. /// - public class BulkOperationResult + public record BulkOperationResult { /// /// Gets the number of successful operations. diff --git a/src/PPDS.Dataverse/BulkOperations/IBulkOperationExecutor.cs b/src/PPDS.Dataverse/BulkOperations/IBulkOperationExecutor.cs index d4ce88ed..44f2140f 100644 --- a/src/PPDS.Dataverse/BulkOperations/IBulkOperationExecutor.cs +++ b/src/PPDS.Dataverse/BulkOperations/IBulkOperationExecutor.cs @@ -3,6 +3,7 @@ using System.Threading; using System.Threading.Tasks; using Microsoft.Xrm.Sdk; +using PPDS.Dataverse.Progress; namespace PPDS.Dataverse.BulkOperations { @@ -18,12 +19,14 @@ public interface IBulkOperationExecutor /// The entity logical name. /// The entities to create. /// Bulk operation options. + /// Optional progress reporter for tracking operation progress. /// Cancellation token. /// The result of the operation. Task CreateMultipleAsync( string entityLogicalName, IEnumerable entities, BulkOperationOptions? options = null, + IProgress? progress = null, CancellationToken cancellationToken = default); /// @@ -32,12 +35,14 @@ Task CreateMultipleAsync( /// The entity logical name. /// The entities to update. /// Bulk operation options. + /// Optional progress reporter for tracking operation progress. /// Cancellation token. /// The result of the operation. Task UpdateMultipleAsync( string entityLogicalName, IEnumerable entities, BulkOperationOptions? options = null, + IProgress? progress = null, CancellationToken cancellationToken = default); /// @@ -46,12 +51,14 @@ Task UpdateMultipleAsync( /// The entity logical name. /// The entities to upsert. /// Bulk operation options. + /// Optional progress reporter for tracking operation progress. /// Cancellation token. /// The result of the operation. Task UpsertMultipleAsync( string entityLogicalName, IEnumerable entities, BulkOperationOptions? options = null, + IProgress? progress = null, CancellationToken cancellationToken = default); /// @@ -60,12 +67,14 @@ Task UpsertMultipleAsync( /// The entity logical name. /// The IDs of the records to delete. /// Bulk operation options. + /// Optional progress reporter for tracking operation progress. /// Cancellation token. /// The result of the operation. Task DeleteMultipleAsync( string entityLogicalName, IEnumerable ids, BulkOperationOptions? options = null, + IProgress? progress = null, CancellationToken cancellationToken = default); } } diff --git a/src/PPDS.Dataverse/PPDS.Dataverse.csproj b/src/PPDS.Dataverse/PPDS.Dataverse.csproj index e937fbd1..1ed70857 100644 --- a/src/PPDS.Dataverse/PPDS.Dataverse.csproj +++ b/src/PPDS.Dataverse/PPDS.Dataverse.csproj @@ -40,13 +40,16 @@ + - - - + + + + + diff --git a/src/PPDS.Dataverse/Pooling/ConnectionPoolOptions.cs b/src/PPDS.Dataverse/Pooling/ConnectionPoolOptions.cs index 1cd1ad4b..297b6816 100644 --- a/src/PPDS.Dataverse/Pooling/ConnectionPoolOptions.cs +++ b/src/PPDS.Dataverse/Pooling/ConnectionPoolOptions.cs @@ -39,9 +39,10 @@ public class ConnectionPoolOptions /// /// Gets or sets the maximum connection lifetime. - /// Default: 30 minutes + /// Set within OAuth token validity window for stable long-running scenarios. + /// Default: 60 minutes /// - public TimeSpan MaxLifetime { get; set; } = TimeSpan.FromMinutes(30); + public TimeSpan MaxLifetime { get; set; } = TimeSpan.FromMinutes(60); /// /// Gets or sets a value indicating whether to disable the affinity cookie for load distribution. @@ -75,6 +76,21 @@ public class ConnectionPoolOptions /// Default: true /// public bool EnableValidation { get; set; } = true; + + /// + /// Gets or sets a value indicating whether to validate connection health on checkout. + /// When true, connections are checked for IsReady, age, and validity before being returned. + /// Default: true + /// + public bool ValidateOnCheckout { get; set; } = true; + + /// + /// Gets or sets the maximum number of retry attempts for auth/connection failures. + /// When a connection fails due to auth or connectivity issues, operations will retry + /// with a new connection up to this many times. + /// Default: 2 + /// + public int MaxConnectionRetries { get; set; } = 2; } /// diff --git a/src/PPDS.Dataverse/Pooling/DataverseConnectionPool.cs b/src/PPDS.Dataverse/Pooling/DataverseConnectionPool.cs index aa26552d..95dab7d8 100644 --- a/src/PPDS.Dataverse/Pooling/DataverseConnectionPool.cs +++ b/src/PPDS.Dataverse/Pooling/DataverseConnectionPool.cs @@ -3,11 +3,13 @@ using System.Collections.Generic; using System.Linq; using System.Net; +using System.ServiceModel; using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using Microsoft.PowerPlatform.Dataverse.Client; +using Microsoft.Xrm.Sdk; using PPDS.Dataverse.Client; using PPDS.Dataverse.DependencyInjection; using PPDS.Dataverse.Pooling.Strategies; @@ -30,12 +32,16 @@ public sealed class DataverseConnectionPool : IDataverseConnectionPool private readonly ConcurrentDictionary _activeConnections; private readonly ConcurrentDictionary _requestCounts; private readonly SemaphoreSlim _connectionSemaphore; + private readonly object _poolLock = new(); private readonly CancellationTokenSource _validationCts; private readonly Task _validationTask; private long _totalRequestsServed; - private bool _disposed; + private long _invalidConnectionCount; + private long _authFailureCount; + private long _connectionFailureCount; + private int _disposed; private static bool _performanceSettingsApplied; private static readonly object _performanceSettingsLock = new(); @@ -104,6 +110,7 @@ public DataverseConnectionPool( /// public async Task GetClientAsync( DataverseClientOptions? options = null, + string? excludeConnectionName = null, CancellationToken cancellationToken = default) { ThrowIfDisposed(); @@ -113,21 +120,46 @@ public async Task GetClientAsync( return CreateDirectClient(options); } - var acquired = await _connectionSemaphore.WaitAsync(_options.Pool.AcquireTimeout, cancellationToken); - if (!acquired) + // Loop until we get a connection + while (true) { - throw new TimeoutException( - $"Timed out waiting for a connection. Active: {GetTotalActiveConnections()}, MaxPoolSize: {_options.Pool.MaxPoolSize}"); - } + cancellationToken.ThrowIfCancellationRequested(); - try - { - return GetConnectionFromPool(options); - } - catch - { - _connectionSemaphore.Release(); - throw; + // Phase 1: Wait for non-throttled connection BEFORE acquiring semaphore + // This prevents holding semaphore slots while waiting for throttle to clear + await WaitForNonThrottledConnectionAsync(excludeConnectionName, cancellationToken); + + // Phase 2: Acquire semaphore + var acquired = await _connectionSemaphore.WaitAsync(_options.Pool.AcquireTimeout, cancellationToken); + if (!acquired) + { + throw new PoolExhaustedException( + GetTotalActiveConnections(), + _options.Pool.MaxPoolSize, + _options.Pool.AcquireTimeout); + } + + try + { + // Phase 3: Select connection and check throttle (quick, no waiting) + var connectionName = SelectConnection(excludeConnectionName); + + // Race check: throttle status could have changed while waiting for semaphore + if (_throttleTracker.IsThrottled(connectionName)) + { + // Connection became throttled - release semaphore and retry + _connectionSemaphore.Release(); + continue; + } + + // Phase 4: Get the actual connection from pool + return GetConnectionFromPoolCore(connectionName, options); + } + catch + { + _connectionSemaphore.Release(); + throw; + } } } @@ -144,8 +176,10 @@ public IPooledClient GetClient(DataverseClientOptions? options = null) var acquired = _connectionSemaphore.Wait(_options.Pool.AcquireTimeout); if (!acquired) { - throw new TimeoutException( - $"Timed out waiting for a connection. Active: {GetTotalActiveConnections()}, MaxPoolSize: {_options.Pool.MaxPoolSize}"); + throw new PoolExhaustedException( + GetTotalActiveConnections(), + _options.Pool.MaxPoolSize, + _options.Pool.AcquireTimeout); } try @@ -159,49 +193,99 @@ public IPooledClient GetClient(DataverseClientOptions? options = null) } } + /// + /// Waits until at least one connection is not throttled. + /// This method does NOT hold the semaphore, allowing other requests to also wait. + /// + private async Task WaitForNonThrottledConnectionAsync( + string? excludeConnectionName, + CancellationToken cancellationToken) + { + while (true) + { + cancellationToken.ThrowIfCancellationRequested(); + + // Check if any non-excluded connection is available (not throttled) + var hasAvailable = _options.Connections + .Where(c => string.IsNullOrEmpty(excludeConnectionName) || + !string.Equals(c.Name, excludeConnectionName, StringComparison.OrdinalIgnoreCase)) + .Any(c => !_throttleTracker.IsThrottled(c.Name)); + + if (hasAvailable) + { + return; // At least one connection is available + } + + // All connections are throttled - wait for shortest expiry + var waitTime = _throttleTracker.GetShortestExpiry(); + if (waitTime <= TimeSpan.Zero) + { + return; // Throttle already expired + } + + // Add a small buffer for timing + waitTime += TimeSpan.FromMilliseconds(100); + + _logger.LogInformation( + "All connections throttled. Waiting {WaitTime} for throttle to clear...", + waitTime); + + await Task.Delay(waitTime, cancellationToken); + + _logger.LogInformation("Throttle wait completed. Resuming operations."); + + // Loop back and check again + } + } + private PooledClient GetConnectionFromPool(DataverseClientOptions? options) { - var connectionName = SelectConnection(); + var connectionName = SelectConnection(excludeConnectionName: null); + return GetConnectionFromPoolCore(connectionName, options); + } + + private PooledClient GetConnectionFromPoolCore(string connectionName, DataverseClientOptions? options) + { var pool = _pools[connectionName]; - // Try to get from pool (bounded iteration, not recursion) - const int maxAttempts = 10; - for (int attempt = 0; attempt < maxAttempts; attempt++) + // Loop to find valid connection, draining any invalid ones + while (true) { - if (pool.TryDequeue(out var existingClient)) + PooledClient? existingClient = null; + lock (_poolLock) { - if (IsValidConnection(existingClient)) + if (pool.IsEmpty || !pool.TryDequeue(out existingClient)) { - _activeConnections.AddOrUpdate(connectionName, 1, (_, v) => v + 1); - Interlocked.Increment(ref _totalRequestsServed); - _requestCounts.AddOrUpdate(connectionName, 1, (_, v) => v + 1); - - existingClient.UpdateLastUsed(); - if (options != null) - { - existingClient.ApplyOptions(options); - } + break; // Pool empty, exit to create new connection + } + } - _logger.LogDebug( - "Retrieved connection from pool. ConnectionId: {ConnectionId}, Name: {ConnectionName}", - existingClient.ConnectionId, - connectionName); + if (IsValidConnection(existingClient)) + { + _activeConnections.AddOrUpdate(connectionName, 1, (_, v) => v + 1); + Interlocked.Increment(ref _totalRequestsServed); + _requestCounts.AddOrUpdate(connectionName, 1, (_, v) => v + 1); - return existingClient; + existingClient.UpdateLastUsed(); + if (options != null) + { + existingClient.ApplyOptions(options); } - // Invalid connection, dispose and try again - existingClient.ForceDispose(); - _logger.LogDebug("Disposed invalid connection. ConnectionId: {ConnectionId}", existingClient.ConnectionId); - } - else - { - // Pool is empty, break and create new - break; + _logger.LogDebug( + "Retrieved connection from pool. ConnectionId: {ConnectionId}, Name: {ConnectionName}", + existingClient.ConnectionId, + connectionName); + + return existingClient; } + + // Invalid connection, dispose and continue loop to try next + existingClient.ForceDispose(); + _logger.LogDebug("Disposed invalid connection. ConnectionId: {ConnectionId}", existingClient.ConnectionId); } - // Create new connection + // Pool is empty (or drained of invalid connections), create new connection var newClient = CreateNewConnection(connectionName); _activeConnections.AddOrUpdate(connectionName, 1, (_, v) => v + 1); Interlocked.Increment(ref _totalRequestsServed); @@ -215,12 +299,33 @@ private PooledClient GetConnectionFromPool(DataverseClientOptions? options) return newClient; } - private string SelectConnection() + private string SelectConnection(string? excludeConnectionName) { var connections = _options.Connections.AsReadOnly(); + + // If an exclusion is requested and we have multiple connections, filter + IReadOnlyList filteredConnections; + if (!string.IsNullOrEmpty(excludeConnectionName) && connections.Count > 1) + { + filteredConnections = connections + .Where(c => !string.Equals(c.Name, excludeConnectionName, StringComparison.OrdinalIgnoreCase)) + .ToList() + .AsReadOnly(); + + // If filtering would leave no connections, use all + if (filteredConnections.Count == 0) + { + filteredConnections = connections; + } + } + else + { + filteredConnections = connections; + } + var activeDict = _activeConnections.ToDictionary(kvp => kvp.Key, kvp => kvp.Value); - return _selectionStrategy.SelectConnection(connections, _throttleTracker, activeDict); + return _selectionStrategy.SelectConnection(filteredConnections, _throttleTracker, activeDict); } private PooledClient CreateNewConnection(string connectionName) @@ -264,8 +369,13 @@ private PooledClient CreateNewConnection(string connectionName) serviceClient.EnableAffinityCookie = false; } + // Disable SDK internal retry - we handle throttling ourselves for visibility + // Without this, ServiceClient silently waits on 429 and retries internally, + // giving no visibility into throttle events + serviceClient.MaxRetryCount = 0; + var client = new DataverseClient(serviceClient); - var pooledClient = new PooledClient(client, connectionName, ReturnConnection); + var pooledClient = new PooledClient(client, connectionName, ReturnConnection, OnThrottleDetected); _logger.LogDebug( "Created new connection. ConnectionId: {ConnectionId}, Name: {ConnectionName}, IsReady: {IsReady}", @@ -276,6 +386,14 @@ private PooledClient CreateNewConnection(string connectionName) return pooledClient; } + /// + /// Called by PooledClient when a throttle is detected. + /// + private void OnThrottleDetected(string connectionName, TimeSpan retryAfter) + { + _throttleTracker.RecordThrottle(connectionName, retryAfter); + } + private PooledClient CreateDirectClient(DataverseClientOptions? options) { // When pooling is disabled, create a direct connection @@ -294,7 +412,7 @@ private PooledClient CreateDirectClient(DataverseClientOptions? options) private void ReturnConnection(PooledClient client) { - if (_disposed) + if (Volatile.Read(ref _disposed) != 0) { client.ForceDispose(); return; @@ -302,8 +420,24 @@ private void ReturnConnection(PooledClient client) try { + // Decrement active connections counter first _activeConnections.AddOrUpdate(client.ConnectionName, 0, (_, v) => Math.Max(0, v - 1)); + // Check if connection was marked as invalid - dispose instead of returning to pool + if (client.IsInvalid) + { + _logger.LogInformation( + "Connection marked invalid, disposing instead of returning. " + + "ConnectionId: {ConnectionId}, Name: {ConnectionName}, Reason: {Reason}", + client.ConnectionId, + client.ConnectionName, + client.InvalidReason); + + Interlocked.Increment(ref _invalidConnectionCount); + client.ForceDispose(); + return; + } + var pool = _pools.GetValueOrDefault(client.ConnectionName); if (pool == null) { @@ -311,26 +445,30 @@ private void ReturnConnection(PooledClient client) return; } - // Reset client to original state + // Reset client to original state (this also resets the _returned flag for reuse) client.Reset(); client.UpdateLastUsed(); - // Check if pool is full - if (pool.Count < _options.Connections.First(c => c.Name == client.ConnectionName).MaxPoolSize) + // Lock around pool enqueue to synchronize with GetConnectionFromPool + lock (_poolLock) { - pool.Enqueue(client); - _logger.LogDebug( - "Returned connection to pool. ConnectionId: {ConnectionId}, Name: {ConnectionName}", - client.ConnectionId, - client.ConnectionName); - } - else - { - client.ForceDispose(); - _logger.LogDebug( - "Pool full, disposed connection. ConnectionId: {ConnectionId}, Name: {ConnectionName}", - client.ConnectionId, - client.ConnectionName); + // Check if pool is full + if (pool.Count < _options.Connections.First(c => c.Name == client.ConnectionName).MaxPoolSize) + { + pool.Enqueue(client); + _logger.LogDebug( + "Returned connection to pool. ConnectionId: {ConnectionId}, Name: {ConnectionName}", + client.ConnectionId, + client.ConnectionName); + } + else + { + client.ForceDispose(); + _logger.LogDebug( + "Pool full, disposed connection. ConnectionId: {ConnectionId}, Name: {ConnectionName}", + client.ConnectionId, + client.ConnectionName); + } } } finally @@ -350,6 +488,14 @@ private bool IsValidConnection(PooledClient client) { try { + // Check if marked as invalid + if (client.IsInvalid) + { + _logger.LogDebug("Connection marked invalid. ConnectionId: {ConnectionId}, Reason: {Reason}", + client.ConnectionId, client.InvalidReason); + return false; + } + // Check idle timeout if (DateTime.UtcNow - client.LastUsedAt > _options.Pool.MaxIdleTime) { @@ -426,9 +572,19 @@ private void InitializeMinimumConnections() foreach (var connection in _options.Connections) { var pool = _pools[connection.Name]; - var toCreate = Math.Min(_options.Pool.MinPoolSize, connection.MaxPoolSize); + var activeCount = _activeConnections.GetValueOrDefault(connection.Name, 0); + var currentTotal = pool.Count + activeCount; + var targetMin = Math.Min(_options.Pool.MinPoolSize, connection.MaxPoolSize); + var toCreate = Math.Max(0, targetMin - currentTotal); + + if (toCreate > 0) + { + _logger.LogDebug( + "Pool {ConnectionName}: Active={Active}, Idle={Idle}, Target={Target}, Creating={ToCreate}", + connection.Name, activeCount, pool.Count, targetMin, toCreate); + } - for (int i = 0; i < toCreate && pool.Count < toCreate; i++) + for (int i = 0; i < toCreate; i++) { try { @@ -520,6 +676,67 @@ private void ValidateOptions() throw new InvalidOperationException($"Connection string for '{connection.Name}' cannot be empty."); } } + + // Warn if multiple connections target different organizations + WarnIfMultipleOrganizations(); + } + + private void WarnIfMultipleOrganizations() + { + if (_options.Connections.Count < 2) + { + return; + } + + var orgUrls = new Dictionary(); // connectionName -> orgUrl + + foreach (var connection in _options.Connections) + { + var orgUrl = ExtractOrgUrl(connection.ConnectionString); + if (!string.IsNullOrEmpty(orgUrl)) + { + orgUrls[connection.Name] = orgUrl; + } + } + + var distinctOrgs = orgUrls.Values.Distinct(StringComparer.OrdinalIgnoreCase).ToList(); + + if (distinctOrgs.Count > 1) + { + _logger.LogWarning( + "Connection pool contains connections to {OrgCount} different organizations: {Orgs}. " + + "Requests will be load-balanced across these organizations, which is likely unintended. " + + "For multi-environment scenarios (Dev/QA/Prod), create separate service providers per environment. " + + "See documentation for the recommended pattern.", + distinctOrgs.Count, + string.Join(", ", distinctOrgs)); + } + } + + private static string? ExtractOrgUrl(string connectionString) + { + // Parse connection string to extract Url parameter + // Format: "AuthType=...;Url=https://org.crm.dynamics.com;..." + if (string.IsNullOrEmpty(connectionString)) + { + return null; + } + + var url = connectionString.Split(';', StringSplitOptions.RemoveEmptyEntries) + .Select(part => part.Split('=', 2)) + .Where(kv => kv.Length == 2 && kv[0].Trim().Equals("Url", StringComparison.OrdinalIgnoreCase)) + .Select(kv => kv[1].Trim()) + .FirstOrDefault(); + + if (url == null) + { + return null; + } + + // Extract just the host for comparison + return Uri.TryCreate(url, UriKind.Absolute, out var uri) + ? uri.Host.ToLowerInvariant() + : url.ToLowerInvariant(); } private PoolStatistics GetStatistics() @@ -547,6 +764,9 @@ private PoolStatistics GetStatistics() ThrottledConnections = connectionStats.Values.Count(s => s.IsThrottled), RequestsServed = _totalRequestsServed, ThrottleEvents = _throttleTracker.TotalThrottleEvents, + InvalidConnections = Interlocked.Read(ref _invalidConnectionCount), + AuthFailures = Interlocked.Read(ref _authFailureCount), + ConnectionFailures = Interlocked.Read(ref _connectionFailureCount), ConnectionStats = connectionStats }; } @@ -557,23 +777,65 @@ private PoolStatistics GetStatistics() private int GetTotalIdleConnections() => _pools.Values.Sum(p => p.Count); + /// + public void RecordAuthFailure() + { + Interlocked.Increment(ref _authFailureCount); + } + + /// + public void RecordConnectionFailure() + { + Interlocked.Increment(ref _connectionFailureCount); + } + private void ThrowIfDisposed() { - if (_disposed) + if (Volatile.Read(ref _disposed) != 0) { throw new ObjectDisposedException(nameof(DataverseConnectionPool)); } } + /// + public async Task ExecuteAsync(OrganizationRequest request, CancellationToken cancellationToken = default) + { + ThrowIfDisposed(); + + // Retry forever on service protection errors - only CancellationToken stops us + while (true) + { + cancellationToken.ThrowIfCancellationRequested(); + + await using var client = await GetClientAsync(cancellationToken: cancellationToken); + + try + { + return await client.ExecuteAsync(request, cancellationToken); + } + catch (FaultException faultEx) + when (ServiceProtectionException.IsServiceProtectionError(faultEx.Detail.ErrorCode)) + { + // Throttle was already recorded by PooledClient via callback. + // Log and retry - GetClientAsync will wait for non-throttled connection. + _logger.LogDebug( + "Request throttled on connection {Connection}. Will retry with next available connection.", + client.ConnectionName); + + // Loop continues - GetClientAsync will wait for a non-throttled connection + } + } + } + /// public void Dispose() { - if (_disposed) + // Use Interlocked.Exchange for atomic disposal check + if (Interlocked.Exchange(ref _disposed, 1) != 0) { return; } - _disposed = true; _validationCts.Cancel(); foreach (var pool in _pools.Values) @@ -591,12 +853,12 @@ public void Dispose() /// public async ValueTask DisposeAsync() { - if (_disposed) + // Use Interlocked.Exchange for atomic disposal check + if (Interlocked.Exchange(ref _disposed, 1) != 0) { return; } - _disposed = true; _validationCts.Cancel(); try diff --git a/src/PPDS.Dataverse/Pooling/IDataverseConnectionPool.cs b/src/PPDS.Dataverse/Pooling/IDataverseConnectionPool.cs index 5dffa2a6..2a6deeee 100644 --- a/src/PPDS.Dataverse/Pooling/IDataverseConnectionPool.cs +++ b/src/PPDS.Dataverse/Pooling/IDataverseConnectionPool.cs @@ -1,6 +1,7 @@ using System; using System.Threading; using System.Threading.Tasks; +using Microsoft.Xrm.Sdk; using PPDS.Dataverse.Client; namespace PPDS.Dataverse.Pooling @@ -15,12 +16,14 @@ public interface IDataverseConnectionPool : IAsyncDisposable, IDisposable /// Gets a client from the pool asynchronously. /// /// Optional per-request options (CallerId, etc.) + /// Optional connection name to exclude from selection (useful for retry after throttle). /// Cancellation token. /// A pooled client that returns to pool on dispose. /// Thrown when no connection is available within the timeout period. /// Thrown when the pool is not enabled or has been disposed. Task GetClientAsync( DataverseClientOptions? options = null, + string? excludeConnectionName = null, CancellationToken cancellationToken = default); /// @@ -41,5 +44,33 @@ Task GetClientAsync( /// Gets a value indicating whether the pool is enabled. /// bool IsEnabled { get; } + + /// + /// Records an authentication failure for statistics. + /// + void RecordAuthFailure(); + + /// + /// Records a connection failure for statistics. + /// + void RecordConnectionFailure(); + + /// + /// Executes a request with automatic retry on service protection errors. + /// This is a convenience method that handles connection management and throttle retry internally. + /// The caller doesn't need to handle service protection exceptions - they are handled transparently. + /// + /// The organization request to execute. + /// Cancellation token. + /// The organization response. + /// + /// This method will: + /// 1. Get a healthy (non-throttled) connection from the pool + /// 2. Execute the request + /// 3. If throttled, automatically wait and retry with a different connection + /// 4. Return the successful response + /// Service protection errors never escape this method - it retries until success or cancellation. + /// + Task ExecuteAsync(OrganizationRequest request, CancellationToken cancellationToken = default); } } diff --git a/src/PPDS.Dataverse/Pooling/IPooledClient.cs b/src/PPDS.Dataverse/Pooling/IPooledClient.cs index d8ffd70d..1560b792 100644 --- a/src/PPDS.Dataverse/Pooling/IPooledClient.cs +++ b/src/PPDS.Dataverse/Pooling/IPooledClient.cs @@ -41,5 +41,23 @@ public interface IPooledClient : IDataverseClient, IAsyncDisposable, IDisposable /// Gets when this connection was last used. /// DateTime LastUsedAt { get; } + + /// + /// Gets whether this connection has been marked as invalid. + /// Invalid connections will be disposed instead of returned to the pool. + /// + bool IsInvalid { get; } + + /// + /// Gets the reason the connection was marked invalid, if any. + /// + string? InvalidReason { get; } + + /// + /// Marks this connection as invalid. It will be disposed on return instead of being pooled. + /// Call this when an unrecoverable error occurs (auth failure, connection failure, etc.). + /// + /// The reason for invalidation (for logging/diagnostics). + void MarkInvalid(string reason); } } diff --git a/src/PPDS.Dataverse/Pooling/PoolExhaustedException.cs b/src/PPDS.Dataverse/Pooling/PoolExhaustedException.cs new file mode 100644 index 00000000..30f021b6 --- /dev/null +++ b/src/PPDS.Dataverse/Pooling/PoolExhaustedException.cs @@ -0,0 +1,73 @@ +using System; + +namespace PPDS.Dataverse.Pooling +{ + /// + /// Exception thrown when the connection pool is exhausted and no connection + /// could be acquired within the configured timeout period. + /// + /// + /// This exception indicates a transient condition where all connections are currently + /// in use. Callers can retry after a brief delay, as connections will become available + /// when other operations complete. + /// + public class PoolExhaustedException : TimeoutException + { + /// + /// Gets the number of active connections at the time of the exception. + /// + public int ActiveConnections { get; } + + /// + /// Gets the maximum pool size configured. + /// + public int MaxPoolSize { get; } + + /// + /// Gets the acquire timeout that was exceeded. + /// + public TimeSpan AcquireTimeout { get; } + + /// + /// Initializes a new instance of the class. + /// + public PoolExhaustedException() + : base("Connection pool exhausted.") + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The error message. + public PoolExhaustedException(string message) + : base(message) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The error message. + /// The inner exception. + public PoolExhaustedException(string message, Exception innerException) + : base(message, innerException) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The number of active connections. + /// The maximum pool size. + /// The acquire timeout that was exceeded. + public PoolExhaustedException(int activeConnections, int maxPoolSize, TimeSpan acquireTimeout) + : base($"Connection pool exhausted. Active: {activeConnections}, MaxPoolSize: {maxPoolSize}, Timeout: {acquireTimeout.TotalSeconds:F1}s. " + + "Consider increasing MaxPoolSize or reducing MaxParallelBatches.") + { + ActiveConnections = activeConnections; + MaxPoolSize = maxPoolSize; + AcquireTimeout = acquireTimeout; + } + } +} diff --git a/src/PPDS.Dataverse/Pooling/PoolStatistics.cs b/src/PPDS.Dataverse/Pooling/PoolStatistics.cs index 4511b929..9e4b6b17 100644 --- a/src/PPDS.Dataverse/Pooling/PoolStatistics.cs +++ b/src/PPDS.Dataverse/Pooling/PoolStatistics.cs @@ -37,6 +37,21 @@ public class PoolStatistics /// public long ThrottleEvents { get; init; } + /// + /// Gets the number of connections that were invalidated due to failures. + /// + public long InvalidConnections { get; init; } + + /// + /// Gets the number of authentication failures detected. + /// + public long AuthFailures { get; init; } + + /// + /// Gets the number of connection failures detected. + /// + public long ConnectionFailures { get; init; } + /// /// Gets per-connection statistics. /// diff --git a/src/PPDS.Dataverse/Pooling/PooledClient.cs b/src/PPDS.Dataverse/Pooling/PooledClient.cs index 99ea63b5..2f5f1da5 100644 --- a/src/PPDS.Dataverse/Pooling/PooledClient.cs +++ b/src/PPDS.Dataverse/Pooling/PooledClient.cs @@ -1,24 +1,28 @@ using System; +using System.ServiceModel; using System.Threading; using System.Threading.Tasks; using Microsoft.Xrm.Sdk; using Microsoft.Xrm.Sdk.Query; using PPDS.Dataverse.Client; +using PPDS.Dataverse.Resilience; namespace PPDS.Dataverse.Pooling { /// /// A client wrapper that returns the connection to the pool on dispose. + /// Automatically detects and records throttle events. /// internal sealed class PooledClient : IPooledClient { private readonly IDataverseClient _client; private readonly Action _returnToPool; + private readonly Action? _onThrottle; private readonly Guid _originalCallerId; private readonly Guid? _originalCallerAADObjectId; private readonly int _originalMaxRetryCount; private readonly TimeSpan _originalRetryPauseTime; - private bool _disposed; + private int _returned; /// /// Initializes a new instance of the class. @@ -26,10 +30,16 @@ internal sealed class PooledClient : IPooledClient /// The underlying client. /// The name of the connection configuration. /// Action to call when returning to pool. - internal PooledClient(IDataverseClient client, string connectionName, Action returnToPool) + /// Optional callback when throttle is detected (connectionName, retryAfter). + internal PooledClient( + IDataverseClient client, + string connectionName, + Action returnToPool, + Action? onThrottle = null) { _client = client ?? throw new ArgumentNullException(nameof(client)); _returnToPool = returnToPool ?? throw new ArgumentNullException(nameof(returnToPool)); + _onThrottle = onThrottle; ConnectionName = connectionName ?? throw new ArgumentNullException(nameof(connectionName)); ConnectionId = Guid.NewGuid(); CreatedAt = DateTime.UtcNow; @@ -54,6 +64,19 @@ internal PooledClient(IDataverseClient client, string connectionName, Action public DateTime LastUsedAt { get; internal set; } + /// + public bool IsInvalid { get; private set; } + + /// + public string? InvalidReason { get; private set; } + + /// + public void MarkInvalid(string reason) + { + IsInvalid = true; + InvalidReason = reason; + } + /// public bool IsReady => _client.IsReady; @@ -118,7 +141,7 @@ internal void UpdateLastUsed() } /// - /// Resets the client to its original state. + /// Resets the client to its original state and marks it available for reuse. /// internal void Reset() { @@ -126,6 +149,13 @@ internal void Reset() _client.CallerAADObjectId = _originalCallerAADObjectId; _client.MaxRetryCount = _originalMaxRetryCount; _client.RetryPauseTime = _originalRetryPauseTime; + + // Reset the invalid state + IsInvalid = false; + InvalidReason = null; + + // Reset the returned flag so this client can be returned again on next use + Interlocked.Exchange(ref _returned, 0); } /// @@ -165,117 +195,237 @@ internal void ForceDispose() } } + #region Throttle Detection + + private static readonly TimeSpan FallbackRetryAfter = TimeSpan.FromSeconds(30); + + /// + /// Checks if an exception is a service protection error and extracts the RetryAfter. + /// + private bool TryHandleThrottle(Exception ex) + { + if (ex is not FaultException faultEx) + { + return false; + } + + var fault = faultEx.Detail; + if (!ServiceProtectionException.IsServiceProtectionError(fault.ErrorCode)) + { + return false; + } + + // Extract RetryAfter and notify the pool + var retryAfter = ExtractRetryAfter(fault); + _onThrottle?.Invoke(ConnectionName, retryAfter); + return true; + } + + /// + /// Extracts the Retry-After duration from a fault. + /// + private static TimeSpan ExtractRetryAfter(OrganizationServiceFault fault) + { + if (fault.ErrorDetails != null && + fault.ErrorDetails.TryGetValue("Retry-After", out var retryAfterObj)) + { + return retryAfterObj switch + { + TimeSpan ts => ts, + int seconds => TimeSpan.FromSeconds(seconds), + double seconds => TimeSpan.FromSeconds(seconds), + _ => FallbackRetryAfter + }; + } + + return FallbackRetryAfter; + } + + /// + /// Wraps a synchronous operation with throttle detection. + /// + private T ExecuteWithThrottleDetection(Func operation) + { + try + { + return operation(); + } + catch (Exception ex) when (TryHandleThrottle(ex)) + { + throw; // Re-throw after recording throttle + } + } + + /// + /// Wraps a synchronous void operation with throttle detection. + /// + private void ExecuteWithThrottleDetection(Action operation) + { + try + { + operation(); + } + catch (Exception ex) when (TryHandleThrottle(ex)) + { + throw; // Re-throw after recording throttle + } + } + + /// + /// Wraps an async operation with throttle detection. + /// + private async Task ExecuteWithThrottleDetectionAsync(Func> operation) + { + try + { + return await operation().ConfigureAwait(false); + } + catch (Exception ex) when (TryHandleThrottle(ex)) + { + throw; // Re-throw after recording throttle + } + } + + /// + /// Wraps an async void operation with throttle detection. + /// + private async Task ExecuteWithThrottleDetectionAsync(Func operation) + { + try + { + await operation().ConfigureAwait(false); + } + catch (Exception ex) when (TryHandleThrottle(ex)) + { + throw; // Re-throw after recording throttle + } + } + + #endregion + #region IOrganizationService Implementation /// - public Guid Create(Entity entity) => _client.Create(entity); + public Guid Create(Entity entity) => + ExecuteWithThrottleDetection(() => _client.Create(entity)); /// - public Entity Retrieve(string entityName, Guid id, ColumnSet columnSet) - => _client.Retrieve(entityName, id, columnSet); + public Entity Retrieve(string entityName, Guid id, ColumnSet columnSet) => + ExecuteWithThrottleDetection(() => _client.Retrieve(entityName, id, columnSet)); /// - public void Update(Entity entity) => _client.Update(entity); + public void Update(Entity entity) => + ExecuteWithThrottleDetection(() => _client.Update(entity)); /// - public void Delete(string entityName, Guid id) => _client.Delete(entityName, id); + public void Delete(string entityName, Guid id) => + ExecuteWithThrottleDetection(() => _client.Delete(entityName, id)); /// - public OrganizationResponse Execute(OrganizationRequest request) => _client.Execute(request); + public OrganizationResponse Execute(OrganizationRequest request) => + ExecuteWithThrottleDetection(() => _client.Execute(request)); /// - public void Associate(string entityName, Guid entityId, Relationship relationship, EntityReferenceCollection relatedEntities) - => _client.Associate(entityName, entityId, relationship, relatedEntities); + public void Associate(string entityName, Guid entityId, Relationship relationship, EntityReferenceCollection relatedEntities) => + ExecuteWithThrottleDetection(() => _client.Associate(entityName, entityId, relationship, relatedEntities)); /// - public void Disassociate(string entityName, Guid entityId, Relationship relationship, EntityReferenceCollection relatedEntities) - => _client.Disassociate(entityName, entityId, relationship, relatedEntities); + public void Disassociate(string entityName, Guid entityId, Relationship relationship, EntityReferenceCollection relatedEntities) => + ExecuteWithThrottleDetection(() => _client.Disassociate(entityName, entityId, relationship, relatedEntities)); /// - public EntityCollection RetrieveMultiple(QueryBase query) => _client.RetrieveMultiple(query); + public EntityCollection RetrieveMultiple(QueryBase query) => + ExecuteWithThrottleDetection(() => _client.RetrieveMultiple(query)); #endregion #region IOrganizationServiceAsync Implementation /// - public Task CreateAsync(Entity entity) => _client.CreateAsync(entity); + public Task CreateAsync(Entity entity) => + ExecuteWithThrottleDetectionAsync(() => _client.CreateAsync(entity)); /// - public Task RetrieveAsync(string entityName, Guid id, ColumnSet columnSet) - => _client.RetrieveAsync(entityName, id, columnSet); + public Task RetrieveAsync(string entityName, Guid id, ColumnSet columnSet) => + ExecuteWithThrottleDetectionAsync(() => _client.RetrieveAsync(entityName, id, columnSet)); /// - public Task UpdateAsync(Entity entity) => _client.UpdateAsync(entity); + public Task UpdateAsync(Entity entity) => + ExecuteWithThrottleDetectionAsync(() => _client.UpdateAsync(entity)); /// - public Task DeleteAsync(string entityName, Guid id) => _client.DeleteAsync(entityName, id); + public Task DeleteAsync(string entityName, Guid id) => + ExecuteWithThrottleDetectionAsync(() => _client.DeleteAsync(entityName, id)); /// - public Task ExecuteAsync(OrganizationRequest request) => _client.ExecuteAsync(request); + public Task ExecuteAsync(OrganizationRequest request) => + ExecuteWithThrottleDetectionAsync(() => _client.ExecuteAsync(request)); /// - public Task AssociateAsync(string entityName, Guid entityId, Relationship relationship, EntityReferenceCollection relatedEntities) - => _client.AssociateAsync(entityName, entityId, relationship, relatedEntities); + public Task AssociateAsync(string entityName, Guid entityId, Relationship relationship, EntityReferenceCollection relatedEntities) => + ExecuteWithThrottleDetectionAsync(() => _client.AssociateAsync(entityName, entityId, relationship, relatedEntities)); /// - public Task DisassociateAsync(string entityName, Guid entityId, Relationship relationship, EntityReferenceCollection relatedEntities) - => _client.DisassociateAsync(entityName, entityId, relationship, relatedEntities); + public Task DisassociateAsync(string entityName, Guid entityId, Relationship relationship, EntityReferenceCollection relatedEntities) => + ExecuteWithThrottleDetectionAsync(() => _client.DisassociateAsync(entityName, entityId, relationship, relatedEntities)); /// - public Task RetrieveMultipleAsync(QueryBase query) => _client.RetrieveMultipleAsync(query); + public Task RetrieveMultipleAsync(QueryBase query) => + ExecuteWithThrottleDetectionAsync(() => _client.RetrieveMultipleAsync(query)); #endregion #region IOrganizationServiceAsync2 Implementation /// - public Task CreateAsync(Entity entity, CancellationToken cancellationToken) - => _client.CreateAsync(entity, cancellationToken); + public Task CreateAsync(Entity entity, CancellationToken cancellationToken) => + ExecuteWithThrottleDetectionAsync(() => _client.CreateAsync(entity, cancellationToken)); /// - public Task CreateAndReturnAsync(Entity entity, CancellationToken cancellationToken) - => _client.CreateAndReturnAsync(entity, cancellationToken); + public Task CreateAndReturnAsync(Entity entity, CancellationToken cancellationToken) => + ExecuteWithThrottleDetectionAsync(() => _client.CreateAndReturnAsync(entity, cancellationToken)); /// - public Task RetrieveAsync(string entityName, Guid id, ColumnSet columnSet, CancellationToken cancellationToken) - => _client.RetrieveAsync(entityName, id, columnSet, cancellationToken); + public Task RetrieveAsync(string entityName, Guid id, ColumnSet columnSet, CancellationToken cancellationToken) => + ExecuteWithThrottleDetectionAsync(() => _client.RetrieveAsync(entityName, id, columnSet, cancellationToken)); /// - public Task UpdateAsync(Entity entity, CancellationToken cancellationToken) - => _client.UpdateAsync(entity, cancellationToken); + public Task UpdateAsync(Entity entity, CancellationToken cancellationToken) => + ExecuteWithThrottleDetectionAsync(() => _client.UpdateAsync(entity, cancellationToken)); /// - public Task DeleteAsync(string entityName, Guid id, CancellationToken cancellationToken) - => _client.DeleteAsync(entityName, id, cancellationToken); + public Task DeleteAsync(string entityName, Guid id, CancellationToken cancellationToken) => + ExecuteWithThrottleDetectionAsync(() => _client.DeleteAsync(entityName, id, cancellationToken)); /// - public Task ExecuteAsync(OrganizationRequest request, CancellationToken cancellationToken) - => _client.ExecuteAsync(request, cancellationToken); + public Task ExecuteAsync(OrganizationRequest request, CancellationToken cancellationToken) => + ExecuteWithThrottleDetectionAsync(() => _client.ExecuteAsync(request, cancellationToken)); /// - public Task AssociateAsync(string entityName, Guid entityId, Relationship relationship, EntityReferenceCollection relatedEntities, CancellationToken cancellationToken) - => _client.AssociateAsync(entityName, entityId, relationship, relatedEntities, cancellationToken); + public Task AssociateAsync(string entityName, Guid entityId, Relationship relationship, EntityReferenceCollection relatedEntities, CancellationToken cancellationToken) => + ExecuteWithThrottleDetectionAsync(() => _client.AssociateAsync(entityName, entityId, relationship, relatedEntities, cancellationToken)); /// - public Task DisassociateAsync(string entityName, Guid entityId, Relationship relationship, EntityReferenceCollection relatedEntities, CancellationToken cancellationToken) - => _client.DisassociateAsync(entityName, entityId, relationship, relatedEntities, cancellationToken); + public Task DisassociateAsync(string entityName, Guid entityId, Relationship relationship, EntityReferenceCollection relatedEntities, CancellationToken cancellationToken) => + ExecuteWithThrottleDetectionAsync(() => _client.DisassociateAsync(entityName, entityId, relationship, relatedEntities, cancellationToken)); /// - public Task RetrieveMultipleAsync(QueryBase query, CancellationToken cancellationToken) - => _client.RetrieveMultipleAsync(query, cancellationToken); + public Task RetrieveMultipleAsync(QueryBase query, CancellationToken cancellationToken) => + ExecuteWithThrottleDetectionAsync(() => _client.RetrieveMultipleAsync(query, cancellationToken)); #endregion /// public void Dispose() { - if (_disposed) + // Use Interlocked.Exchange to ensure we only return to pool once per checkout. + // This prevents double-release of the semaphore if Dispose is called multiple times. + // The flag is reset in Reset() when the connection is returned to the pool. + if (Interlocked.Exchange(ref _returned, 1) != 0) { return; } - _disposed = true; _returnToPool(this); } diff --git a/src/PPDS.Dataverse/Pooling/Strategies/ThrottleAwareStrategy.cs b/src/PPDS.Dataverse/Pooling/Strategies/ThrottleAwareStrategy.cs index 90c9daa6..7a5e5544 100644 --- a/src/PPDS.Dataverse/Pooling/Strategies/ThrottleAwareStrategy.cs +++ b/src/PPDS.Dataverse/Pooling/Strategies/ThrottleAwareStrategy.cs @@ -1,3 +1,4 @@ +using System; using System.Collections.Generic; using System.Linq; using System.Threading; @@ -7,7 +8,7 @@ namespace PPDS.Dataverse.Pooling.Strategies { /// /// Avoids throttled connections and falls back to round-robin among available connections. - /// If all connections are throttled, waits for the shortest throttle to expire. + /// If all connections are throttled, returns the connection with shortest remaining throttle time. /// public sealed class ThrottleAwareStrategy : IConnectionSelectionStrategy { @@ -21,7 +22,7 @@ public string SelectConnection( { if (connections.Count == 0) { - throw new System.InvalidOperationException("No connections available."); + throw new InvalidOperationException("No connections available."); } if (connections.Count == 1) @@ -29,16 +30,16 @@ public string SelectConnection( return connections[0].Name; } - // Filter to non-throttled connections + // Strictly filter out ALL throttled connections var availableConnections = connections .Where(c => !throttleTracker.IsThrottled(c.Name)) .ToList(); if (availableConnections.Count == 0) { - // All connections throttled - use the one with shortest remaining throttle - // For now, just return the first one and let the caller handle retry - return connections[0].Name; + // All connections are throttled - find the one with shortest remaining throttle time + // so the caller can wait for it to become available + return FindConnectionWithShortestThrottleExpiry(connections, throttleTracker); } if (availableConnections.Count == 1) @@ -46,9 +47,40 @@ public string SelectConnection( return availableConnections[0].Name; } - // Round-robin among available connections + // Round-robin among available (non-throttled) connections only var index = Interlocked.Increment(ref _counter) % availableConnections.Count; return availableConnections[index].Name; } + + /// + /// Finds the connection with the shortest remaining throttle expiry time. + /// + private static string FindConnectionWithShortestThrottleExpiry( + IReadOnlyList connections, + IThrottleTracker throttleTracker) + { + string? shortestConnection = null; + DateTime? shortestExpiry = null; + + foreach (var connection in connections) + { + var expiry = throttleTracker.GetThrottleExpiry(connection.Name); + + if (expiry == null) + { + // This connection is no longer throttled (expired between checks) - use it + return connection.Name; + } + + if (shortestExpiry == null || expiry < shortestExpiry) + { + shortestExpiry = expiry; + shortestConnection = connection.Name; + } + } + + // Return the connection with shortest expiry, or fall back to first if none found + return shortestConnection ?? connections[0].Name; + } } } diff --git a/src/PPDS.Dataverse/Progress/ProgressSnapshot.cs b/src/PPDS.Dataverse/Progress/ProgressSnapshot.cs new file mode 100644 index 00000000..bd3887a4 --- /dev/null +++ b/src/PPDS.Dataverse/Progress/ProgressSnapshot.cs @@ -0,0 +1,79 @@ +using System; + +namespace PPDS.Dataverse.Progress +{ + /// + /// Immutable snapshot of progress state at a point in time. + /// + public sealed class ProgressSnapshot + { + /// + /// Gets the number of successfully processed records. + /// + public long Succeeded { get; init; } + + /// + /// Gets the number of failed records. + /// + public long Failed { get; init; } + + /// + /// Gets the total number of processed records (succeeded + failed). + /// + public long Processed => Succeeded + Failed; + + /// + /// Gets the total number of records to process. + /// + public long Total { get; init; } + + /// + /// Gets the remaining records to process. + /// + public long Remaining => Math.Max(0, Total - Processed); + + /// + /// Gets the percentage complete (0-100). + /// + public double PercentComplete => Total > 0 ? (double)Processed / Total * 100 : 0; + + /// + /// Gets the elapsed time since tracking started. + /// + public TimeSpan Elapsed { get; init; } + + /// + /// Gets the processing rate (records per second) - total records divided by elapsed time. + /// Use this rate for display and throughput reporting. + /// + public double RatePerSecond => OverallRatePerSecond; + + /// + /// Gets the overall processing rate (records per second) since start. + /// This is the stable rate used for ETA calculations. Same as . + /// + public double OverallRatePerSecond { get; init; } + + /// + /// Gets the instantaneous processing rate (records per second) based on a rolling window. + /// + /// Warning: This value can fluctuate wildly in batch operations when multiple + /// batches complete at once. For most display purposes, use instead. + /// + /// + public double InstantRatePerSecond { get; init; } + + /// + /// Gets the estimated time remaining based on overall rate. + /// Returns if rate is zero. + /// + public TimeSpan EstimatedRemaining { get; init; } + + /// + /// Gets the estimated completion time (UTC). + /// + public DateTime EstimatedCompletionUtc => EstimatedRemaining == TimeSpan.MaxValue + ? DateTime.MaxValue + : DateTime.UtcNow.Add(EstimatedRemaining); + } +} diff --git a/src/PPDS.Dataverse/Progress/ProgressTracker.cs b/src/PPDS.Dataverse/Progress/ProgressTracker.cs new file mode 100644 index 00000000..2ad29078 --- /dev/null +++ b/src/PPDS.Dataverse/Progress/ProgressTracker.cs @@ -0,0 +1,252 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Threading; + +namespace PPDS.Dataverse.Progress +{ + /// + /// Thread-safe progress tracker for bulk operations. + /// Provides accurate rate calculation with both overall and instantaneous rates. + /// + /// + /// + /// This class is designed for high-throughput scenarios with parallel batch processing. + /// All public methods are thread-safe. + /// + /// + /// Rate calculations: + /// + /// : Total records / total elapsed time. Stable, used for ETA. + /// : Based on a rolling window (default 30s). Reflects current performance. + /// + /// + /// + /// + /// + /// var tracker = new ProgressTracker(totalRecords); + /// + /// foreach (var batch in batches) + /// { + /// var result = await ProcessBatchAsync(batch); + /// tracker.RecordProgress(result.SuccessCount, result.FailureCount); + /// + /// var snapshot = tracker.GetSnapshot(); + /// Console.WriteLine($"{snapshot.Processed}/{snapshot.Total} ({snapshot.PercentComplete:F1}%) " + + /// $"@ {snapshot.InstantRatePerSecond:F0}/s, ETA: {snapshot.EstimatedRemaining:mm\\:ss}"); + /// } + /// + /// + public sealed class ProgressTracker + { + private readonly Stopwatch _stopwatch; + private readonly long _totalCount; + private readonly TimeSpan _rollingWindowDuration; + private readonly object _samplesLock = new(); + private readonly Queue<(long ticks, long processed)> _samples; + + private long _succeeded; + private long _failed; + + /// + /// Initializes a new instance of the class. + /// + /// The total number of records to process. + /// + /// The duration of the rolling window for instantaneous rate calculation. + /// Default is 30 seconds. + /// + /// + /// is negative, or + /// is less than 1. + /// + public ProgressTracker(long totalCount, int rollingWindowSeconds = 30) + { + if (totalCount < 0) + throw new ArgumentOutOfRangeException(nameof(totalCount), "Total count cannot be negative."); + if (rollingWindowSeconds < 1) + throw new ArgumentOutOfRangeException(nameof(rollingWindowSeconds), "Rolling window must be at least 1 second."); + + _totalCount = totalCount; + _rollingWindowDuration = TimeSpan.FromSeconds(rollingWindowSeconds); + _samples = new Queue<(long, long)>(); + _stopwatch = Stopwatch.StartNew(); + + // Add initial sample at t=0 + lock (_samplesLock) + { + _samples.Enqueue((0, 0)); + } + } + + /// + /// Gets the total count of records to process. + /// + public long TotalCount => _totalCount; + + /// + /// Gets the current count of successfully processed records. + /// + public long Succeeded => Interlocked.Read(ref _succeeded); + + /// + /// Gets the current count of failed records. + /// + public long Failed => Interlocked.Read(ref _failed); + + /// + /// Gets the current count of processed records (succeeded + failed). + /// + public long Processed => Succeeded + Failed; + + /// + /// Records progress for a batch of records. + /// This method is thread-safe and can be called from multiple threads. + /// + /// Number of records that succeeded in this batch. + /// Number of records that failed in this batch. Default is 0. + /// + /// or is negative. + /// + public void RecordProgress(int successCount, int failureCount = 0) + { + if (successCount < 0) + throw new ArgumentOutOfRangeException(nameof(successCount), "Success count cannot be negative."); + if (failureCount < 0) + throw new ArgumentOutOfRangeException(nameof(failureCount), "Failure count cannot be negative."); + + if (successCount > 0) + { + Interlocked.Add(ref _succeeded, successCount); + } + + if (failureCount > 0) + { + Interlocked.Add(ref _failed, failureCount); + } + + // Add sample for rolling window calculation + var currentTicks = _stopwatch.ElapsedTicks; + var processed = Processed; + + lock (_samplesLock) + { + _samples.Enqueue((currentTicks, processed)); + PruneSamples(currentTicks); + } + } + + /// + /// Gets a snapshot of the current progress state. + /// This method is thread-safe. + /// + /// An immutable snapshot of progress metrics. + public ProgressSnapshot GetSnapshot() + { + var elapsed = _stopwatch.Elapsed; + var succeeded = Interlocked.Read(ref _succeeded); + var failed = Interlocked.Read(ref _failed); + var processed = succeeded + failed; + var remaining = Math.Max(0, _totalCount - processed); + + // Calculate overall rate (stable) + var overallRate = elapsed.TotalSeconds > 0.1 + ? processed / elapsed.TotalSeconds + : 0; + + // Calculate instant rate from rolling window + var instantRate = CalculateInstantRate(processed); + + // Calculate ETA based on overall rate (more stable) + var eta = overallRate > 0.001 + ? TimeSpan.FromSeconds(remaining / overallRate) + : TimeSpan.MaxValue; + + // Cap ETA at a reasonable maximum (7 days) + if (eta > TimeSpan.FromDays(7)) + { + eta = TimeSpan.MaxValue; + } + + return new ProgressSnapshot + { + Succeeded = succeeded, + Failed = failed, + Total = _totalCount, + Elapsed = elapsed, + OverallRatePerSecond = overallRate, + InstantRatePerSecond = instantRate, + EstimatedRemaining = eta + }; + } + + /// + /// Resets the tracker to its initial state. + /// + public void Reset() + { + Interlocked.Exchange(ref _succeeded, 0); + Interlocked.Exchange(ref _failed, 0); + + lock (_samplesLock) + { + _samples.Clear(); + _samples.Enqueue((0, 0)); + } + + _stopwatch.Restart(); + } + + private double CalculateInstantRate(long currentProcessed) + { + lock (_samplesLock) + { + if (_samples.Count < 2) + { + // Not enough samples, fall back to overall rate + var elapsed = _stopwatch.Elapsed; + return elapsed.TotalSeconds > 0.1 ? currentProcessed / elapsed.TotalSeconds : 0; + } + + // Get oldest and newest samples in window + var oldest = _samples.Peek(); + var currentTicks = _stopwatch.ElapsedTicks; + + // Calculate time span of the window + var ticksPerSecond = Stopwatch.Frequency; + var windowTicks = currentTicks - oldest.ticks; + var windowSeconds = (double)windowTicks / ticksPerSecond; + + if (windowSeconds < 0.1) + { + // Window too small, fall back to overall rate + var elapsed = _stopwatch.Elapsed; + return elapsed.TotalSeconds > 0.1 ? currentProcessed / elapsed.TotalSeconds : 0; + } + + var recordsInWindow = currentProcessed - oldest.processed; + return recordsInWindow / windowSeconds; + } + } + + private void PruneSamples(long currentTicks) + { + // Remove samples older than the rolling window + var ticksPerSecond = Stopwatch.Frequency; + var windowTicks = (long)(_rollingWindowDuration.TotalSeconds * ticksPerSecond); + var cutoffTicks = currentTicks - windowTicks; + + while (_samples.Count > 1 && _samples.Peek().ticks < cutoffTicks) + { + _samples.Dequeue(); + } + + // Keep at least 2 samples for rate calculation, but limit memory + // Keep max 1000 samples (for very high-frequency updates) + while (_samples.Count > 1000) + { + _samples.Dequeue(); + } + } + } +} diff --git a/src/PPDS.Dataverse/README.md b/src/PPDS.Dataverse/README.md index 617b2a22..1d9c0745 100644 --- a/src/PPDS.Dataverse/README.md +++ b/src/PPDS.Dataverse/README.md @@ -134,6 +134,91 @@ services.AddDataverseConnectionPool(options => services.AddDataverseConnectionPool(configuration); ``` +## Multi-Environment Scenarios + +When working with multiple environments (Dev, QA, Prod), **do not put them in the same connection pool**. The pool is designed for load-balancing within a single organization, not for cross-environment operations. + +### Wrong: Multiple Orgs in One Pool + +```json +{ + "Dataverse": { + "Connections": [ + { "Name": "Dev", "ConnectionString": "Url=https://dev.crm.dynamics.com;..." }, + { "Name": "QA", "ConnectionString": "Url=https://qa.crm.dynamics.com;..." } + ] + } +} +``` + +This will load-balance requests randomly across Dev and QA, which is almost never intended. The SDK will log a warning if it detects this configuration. + +### Correct: Separate Providers per Environment + +Structure your configuration with separate environment sections: + +```json +{ + "Environments": { + "Dev": { + "ConnectionString": "AuthType=OAuth;Url=https://dev.crm.dynamics.com;..." + }, + "QA": { + "ConnectionString": "AuthType=OAuth;Url=https://qa.crm.dynamics.com;..." + }, + "Prod": { + "ConnectionString": "AuthType=OAuth;Url=https://prod.crm.dynamics.com;..." + } + } +} +``` + +Then create separate service providers for each environment: + +```csharp +// Create separate providers per environment +await using var devProvider = CreateProvider(config["Environments:Dev:ConnectionString"]); +await using var qaProvider = CreateProvider(config["Environments:QA:ConnectionString"]); + +// Export from Dev +var devExporter = devProvider.GetRequiredService(); +await devExporter.ExportAsync(schema, "data.zip", options); + +// Import to QA +var qaImporter = qaProvider.GetRequiredService(); +await qaImporter.ImportAsync("data.zip", importOptions); + +ServiceProvider CreateProvider(string connectionString) +{ + var services = new ServiceCollection(); + services.AddDataverseConnectionPool(options => + { + options.Connections.Add(new DataverseConnection("Primary", connectionString)); + }); + // Add other services... + return services.BuildServiceProvider(); +} +``` + +### When to Use Multiple Connections in One Pool + +Multiple connections in a single pool are appropriate when: + +1. **Same organization, multiple Application Users** - Multiply your API quota by using multiple registered applications: + + ```json + { + "Dataverse": { + "Connections": [ + { "Name": "AppUser1", "ConnectionString": "Url=https://org.crm.dynamics.com;ClientId=app1;..." }, + { "Name": "AppUser2", "ConnectionString": "Url=https://org.crm.dynamics.com;ClientId=app2;..." } + ] + } + } + ``` + +2. **High-availability within one org** - Multiple connections to the same org for resilience. + ## Impersonation Execute operations on behalf of another user: diff --git a/src/PPDS.Dataverse/Resilience/IThrottleTracker.cs b/src/PPDS.Dataverse/Resilience/IThrottleTracker.cs index 8600064d..375ba8ac 100644 --- a/src/PPDS.Dataverse/Resilience/IThrottleTracker.cs +++ b/src/PPDS.Dataverse/Resilience/IThrottleTracker.cs @@ -40,5 +40,22 @@ public interface IThrottleTracker /// Gets the total number of throttle events recorded. /// long TotalThrottleEvents { get; } + + /// + /// Gets the number of currently throttled connections. + /// + int ThrottledConnectionCount { get; } + + /// + /// Gets the names of all currently throttled connections. + /// + IReadOnlyCollection ThrottledConnections { get; } + + /// + /// Gets the shortest time until any throttled connection expires. + /// Returns if no connections are throttled. + /// + /// The shortest time until a throttle expires, or if none are throttled. + TimeSpan GetShortestExpiry(); } } diff --git a/src/PPDS.Dataverse/Resilience/ThrottleTracker.cs b/src/PPDS.Dataverse/Resilience/ThrottleTracker.cs index 882e54e1..5941fe0b 100644 --- a/src/PPDS.Dataverse/Resilience/ThrottleTracker.cs +++ b/src/PPDS.Dataverse/Resilience/ThrottleTracker.cs @@ -1,5 +1,7 @@ using System; using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; using System.Threading; using Microsoft.Extensions.Logging; @@ -27,6 +29,26 @@ public ThrottleTracker(ILogger logger) /// public long TotalThrottleEvents => _totalThrottleEvents; + /// + public int ThrottledConnectionCount + { + get + { + CleanupExpired(); + return _throttleStates.Count; + } + } + + /// + public IReadOnlyCollection ThrottledConnections + { + get + { + CleanupExpired(); + return _throttleStates.Keys.ToList().AsReadOnly(); + } + } + /// public void RecordThrottle(string connectionName, TimeSpan retryAfter) { @@ -112,5 +134,42 @@ public void ClearThrottle(string connectionName) _logger.LogInformation("Cleared throttle for connection: {ConnectionName}", connectionName); } } + + /// + public TimeSpan GetShortestExpiry() + { + CleanupExpired(); + + if (_throttleStates.IsEmpty) + { + return TimeSpan.Zero; + } + + var now = DateTime.UtcNow; + var shortest = _throttleStates.Values + .Select(s => s.ExpiresAt - now) + .Where(t => t > TimeSpan.Zero) + .DefaultIfEmpty(TimeSpan.Zero) + .Min(); + + return shortest; + } + + /// + /// Removes expired throttle states from the dictionary. + /// + private void CleanupExpired() + { + var now = DateTime.UtcNow; + var expired = _throttleStates + .Where(kvp => kvp.Value.ExpiresAt <= now) + .Select(kvp => kvp.Key) + .ToList(); + + foreach (var key in expired.Where(k => _throttleStates.TryRemove(k, out _))) + { + _logger.LogDebug("Throttle expired for connection: {ConnectionName}", key); + } + } } } diff --git a/src/PPDS.Migration.Cli/Commands/AnalyzeCommand.cs b/src/PPDS.Migration.Cli/Commands/AnalyzeCommand.cs index 0dbaf764..1ce8eb88 100644 --- a/src/PPDS.Migration.Cli/Commands/AnalyzeCommand.cs +++ b/src/PPDS.Migration.Cli/Commands/AnalyzeCommand.cs @@ -1,5 +1,10 @@ using System.CommandLine; using System.Text.Json; +using Microsoft.Extensions.DependencyInjection; +using PPDS.Migration.Analysis; +using PPDS.Migration.Cli.Infrastructure; +using PPDS.Migration.Formats; +using PPDS.Migration.Models; namespace PPDS.Migration.Cli.Commands; @@ -61,22 +66,19 @@ private static async Task ExecuteAsync( return ExitCodes.InvalidArguments; } - // TODO: Implement when PPDS.Migration is ready - // var analyzer = new SchemaAnalyzer(); - // var analysis = await analyzer.AnalyzeAsync(schema.FullName, cancellationToken); + // Create service provider for analysis (no Dataverse connection needed) + await using var serviceProvider = ServiceFactory.CreateAnalysisProvider(); + var schemaReader = serviceProvider.GetRequiredService(); + var graphBuilder = serviceProvider.GetRequiredService(); - // Placeholder analysis result - var analysis = new SchemaAnalysis - { - EntityCount = 0, - DependencyCount = 0, - CircularReferenceCount = 0, - Tiers = [], - DeferredFields = new Dictionary(), - ManyToManyRelationships = [] - }; + // Parse schema + var migrationSchema = await schemaReader.ReadAsync(schema.FullName, cancellationToken); + + // Build dependency graph + var graph = graphBuilder.Build(migrationSchema); - await Task.CompletedTask; // Placeholder for async operation + // Convert to analysis result + var analysis = BuildAnalysis(graph, migrationSchema); if (outputFormat == OutputFormat.Json) { @@ -105,6 +107,63 @@ private static async Task ExecuteAsync( } } + private static SchemaAnalysis BuildAnalysis(DependencyGraph graph, MigrationSchema schema) + { + // Build tier info + var tierInfos = new List(); + for (int i = 0; i < graph.Tiers.Count; i++) + { + var tierEntities = graph.Tiers[i]; + var hasCircular = graph.CircularReferences.Any(cr => + cr.Entities.Any(e => tierEntities.Contains(e))); + + tierInfos.Add(new TierInfo + { + Tier = i + 1, + Entities = tierEntities.ToArray(), + HasCircular = hasCircular + }); + } + + // Extract deferred fields from circular references + var deferredFields = new Dictionary(); + foreach (var circular in graph.CircularReferences) + { + foreach (var edge in circular.Edges) + { + if (!deferredFields.ContainsKey(edge.FromEntity)) + { + deferredFields[edge.FromEntity] = []; + } + + var existing = deferredFields[edge.FromEntity].ToList(); + if (!existing.Contains(edge.FieldName)) + { + existing.Add(edge.FieldName); + deferredFields[edge.FromEntity] = existing.ToArray(); + } + } + } + + // Extract M2M relationships from schema + var m2mRelationships = schema.Entities + .SelectMany(e => e.Relationships ?? []) + .Where(r => r.IsManyToMany) + .Select(r => r.IntersectEntity ?? r.Name) + .Distinct() + .ToArray(); + + return new SchemaAnalysis + { + EntityCount = graph.Entities.Count, + DependencyCount = graph.Dependencies.Count, + CircularReferenceCount = graph.CircularReferences.Count, + Tiers = tierInfos.ToArray(), + DeferredFields = deferredFields, + ManyToManyRelationships = m2mRelationships + }; + } + private static void WriteTextOutput(SchemaAnalysis analysis, string schemaPath) { Console.WriteLine("Schema Analysis"); @@ -114,14 +173,7 @@ private static void WriteTextOutput(SchemaAnalysis analysis, string schemaPath) if (analysis.EntityCount == 0) { - Console.WriteLine("Note: Analysis not yet implemented - waiting for PPDS.Migration"); - Console.WriteLine(); - Console.WriteLine("When implemented, this command will display:"); - Console.WriteLine(" - Entity count and dependency count"); - Console.WriteLine(" - Circular reference detection"); - Console.WriteLine(" - Import tier ordering"); - Console.WriteLine(" - Deferred fields for circular dependencies"); - Console.WriteLine(" - Many-to-many relationship mappings"); + Console.WriteLine("No entities found in schema."); return; } @@ -176,10 +228,7 @@ private static void WriteJsonOutput(SchemaAnalysis analysis) hasCircular = t.HasCircular }), deferredFields = analysis.DeferredFields, - manyToManyRelationships = analysis.ManyToManyRelationships, - note = analysis.EntityCount == 0 - ? "Analysis not yet implemented - waiting for PPDS.Migration" - : null + manyToManyRelationships = analysis.ManyToManyRelationships }; var options = new JsonSerializerOptions diff --git a/src/PPDS.Migration.Cli/Commands/ExportCommand.cs b/src/PPDS.Migration.Cli/Commands/ExportCommand.cs index d78d482c..42450f3f 100644 --- a/src/PPDS.Migration.Cli/Commands/ExportCommand.cs +++ b/src/PPDS.Migration.Cli/Commands/ExportCommand.cs @@ -1,4 +1,7 @@ using System.CommandLine; +using Microsoft.Extensions.DependencyInjection; +using PPDS.Migration.Cli.Infrastructure; +using PPDS.Migration.Export; namespace PPDS.Migration.Cli.Commands; @@ -127,39 +130,41 @@ private static async Task ExecuteAsync( return ExitCodes.InvalidArguments; } - ConsoleOutput.WriteProgress("analyzing", "Parsing schema...", json); - ConsoleOutput.WriteProgress("analyzing", "Building dependency graph...", json); - - // TODO: Implement when PPDS.Migration is ready - // var options = new ExportOptions - // { - // ConnectionString = connection, - // SchemaPath = schema.FullName, - // OutputPath = output.FullName, - // DegreeOfParallelism = parallel, - // PageSize = pageSize, - // IncludeFiles = includeFiles - // }; - // - // var exporter = new DataverseExporter(options); - // if (json) - // { - // exporter.Progress += (sender, e) => ConsoleOutput.WriteProgress("export", e.Entity, e.Current, e.Total, e.RecordsPerSecond); - // } - // await exporter.ExportAsync(cancellationToken); - - ConsoleOutput.WriteProgress("export", "Export not yet implemented - waiting for PPDS.Migration", json); - await Task.Delay(100, cancellationToken); // Placeholder + // Create service provider and get exporter + await using var serviceProvider = ServiceFactory.CreateProvider(connection); + var exporter = serviceProvider.GetRequiredService(); + var progressReporter = ServiceFactory.CreateProgressReporter(json); + + // Configure export options + var exportOptions = new ExportOptions + { + DegreeOfParallelism = parallel, + PageSize = pageSize, + ExportFiles = includeFiles + }; + + // Execute export + var result = await exporter.ExportAsync( + schema.FullName, + output.FullName, + exportOptions, + progressReporter, + cancellationToken); + + // Report completion + if (!result.Success) + { + ConsoleOutput.WriteError($"Export completed with {result.Errors.Count} error(s).", json); + return ExitCodes.Failure; + } if (!json) { Console.WriteLine(); Console.WriteLine("Export completed successfully."); Console.WriteLine($"Output: {output.FullName}"); - } - else - { - ConsoleOutput.WriteCompletion(TimeSpan.Zero, 0, 0, json); + Console.WriteLine($"Entities: {result.EntitiesExported}, Records: {result.RecordsExported:N0}"); + Console.WriteLine($"Duration: {result.Duration:hh\\:mm\\:ss}, Rate: {result.RecordsPerSecond:F1} rec/s"); } return ExitCodes.Success; diff --git a/src/PPDS.Migration.Cli/Commands/ImportCommand.cs b/src/PPDS.Migration.Cli/Commands/ImportCommand.cs index 9470d6b7..4dc49e3c 100644 --- a/src/PPDS.Migration.Cli/Commands/ImportCommand.cs +++ b/src/PPDS.Migration.Cli/Commands/ImportCommand.cs @@ -1,4 +1,9 @@ using System.CommandLine; +using Microsoft.Extensions.DependencyInjection; +using PPDS.Migration.Cli.Infrastructure; +using PPDS.Migration.Formats; +using PPDS.Migration.Import; +using PPDS.Migration.Models; namespace PPDS.Migration.Cli.Commands; @@ -45,6 +50,10 @@ public static Command Create() getDefaultValue: () => ImportMode.Upsert, description: "Import mode: Create, Update, or Upsert"); + var userMappingOption = new Option( + aliases: ["--user-mapping", "-u"], + description: "Path to user mapping XML file for remapping user references"); + var jsonOption = new Option( name: "--json", getDefaultValue: () => false, @@ -64,6 +73,7 @@ public static Command Create() bypassFlowsOption, continueOnErrorOption, modeOption, + userMappingOption, jsonOption, verboseOption }; @@ -77,9 +87,26 @@ public static Command Create() var bypassFlows = context.ParseResult.GetValueForOption(bypassFlowsOption); var continueOnError = context.ParseResult.GetValueForOption(continueOnErrorOption); var mode = context.ParseResult.GetValueForOption(modeOption); + var userMappingFile = context.ParseResult.GetValueForOption(userMappingOption); var json = context.ParseResult.GetValueForOption(jsonOption); var verbose = context.ParseResult.GetValueForOption(verboseOption); + // Validate data file exists first (explicit argument) + if (!data.Exists) + { + ConsoleOutput.WriteError($"Data file not found: {data.FullName}", json); + context.ExitCode = ExitCodes.InvalidArguments; + return; + } + + // Validate user mapping file if specified + if (userMappingFile != null && !userMappingFile.Exists) + { + ConsoleOutput.WriteError($"User mapping file not found: {userMappingFile.FullName}", json); + context.ExitCode = ExitCodes.InvalidArguments; + return; + } + // Resolve connection string from argument or environment variable string connection; try @@ -98,7 +125,7 @@ public static Command Create() context.ExitCode = await ExecuteAsync( connection, data, batchSize, bypassPlugins, bypassFlows, - continueOnError, mode, json, verbose, context.GetCancellationToken()); + continueOnError, mode, userMappingFile, json, verbose, context.GetCancellationToken()); }); return command; @@ -112,52 +139,67 @@ private static async Task ExecuteAsync( bool bypassFlows, bool continueOnError, ImportMode mode, + FileInfo? userMappingFile, bool json, bool verbose, CancellationToken cancellationToken) { try { - // Validate data file exists - if (!data.Exists) + // Create service provider and get importer + await using var serviceProvider = ServiceFactory.CreateProvider(connection); + var importer = serviceProvider.GetRequiredService(); + var progressReporter = ServiceFactory.CreateProgressReporter(json); + + // Load user mappings if provided + UserMappingCollection? userMappings = null; + if (userMappingFile != null) { - ConsoleOutput.WriteError($"Data file not found: {data.FullName}", json); - return ExitCodes.InvalidArguments; + if (!json) + { + Console.WriteLine($"Loading user mappings from {userMappingFile.FullName}..."); + } + + var mappingReader = new UserMappingReader(); + userMappings = await mappingReader.ReadAsync(userMappingFile.FullName, cancellationToken); + + if (!json) + { + Console.WriteLine($"Loaded {userMappings.Mappings.Count} user mapping(s)."); + } } - ConsoleOutput.WriteProgress("analyzing", "Reading data archive...", json); - ConsoleOutput.WriteProgress("analyzing", "Building dependency graph...", json); - - // TODO: Implement when PPDS.Migration is ready - // var options = new ImportOptions - // { - // ConnectionString = connection, - // DataPath = data.FullName, - // BatchSize = batchSize, - // BypassPlugins = bypassPlugins, - // BypassFlows = bypassFlows, - // ContinueOnError = continueOnError, - // Mode = mode - // }; - // - // var importer = new DataverseImporter(options); - // if (json) - // { - // importer.Progress += (sender, e) => ConsoleOutput.WriteProgress("import", e.Entity, e.Current, e.Total, e.RecordsPerSecond); - // } - // var result = await importer.ImportAsync(cancellationToken); - - ConsoleOutput.WriteProgress("import", "Import not yet implemented - waiting for PPDS.Migration", json); - await Task.Delay(100, cancellationToken); // Placeholder + // Configure import options + var importOptions = new ImportOptions + { + BatchSize = batchSize, + BypassCustomPluginExecution = bypassPlugins, + BypassPowerAutomateFlows = bypassFlows, + ContinueOnError = continueOnError, + Mode = MapImportMode(mode), + UserMappings = userMappings + }; + + // Execute import + var result = await importer.ImportAsync( + data.FullName, + importOptions, + progressReporter, + cancellationToken); + + // Report completion + if (!result.Success) + { + ConsoleOutput.WriteError($"Import completed with {result.Errors.Count} error(s).", json); + return ExitCodes.Failure; + } if (!json) { Console.WriteLine(); Console.WriteLine("Import completed successfully."); - } - else - { - ConsoleOutput.WriteCompletion(TimeSpan.Zero, 0, 0, json); + Console.WriteLine($"Tiers: {result.TiersProcessed}, Records: {result.RecordsImported:N0}"); + Console.WriteLine($"Duration: {result.Duration:hh\\:mm\\:ss}, Rate: {result.RecordsPerSecond:F1} rec/s"); } return ExitCodes.Success; @@ -177,4 +219,15 @@ private static async Task ExecuteAsync( return ExitCodes.Failure; } } + + /// + /// Maps CLI ImportMode to Migration library ImportMode. + /// + private static PPDS.Migration.Import.ImportMode MapImportMode(ImportMode mode) => mode switch + { + ImportMode.Create => PPDS.Migration.Import.ImportMode.Create, + ImportMode.Update => PPDS.Migration.Import.ImportMode.Update, + ImportMode.Upsert => PPDS.Migration.Import.ImportMode.Upsert, + _ => PPDS.Migration.Import.ImportMode.Upsert + }; } diff --git a/src/PPDS.Migration.Cli/Commands/MigrateCommand.cs b/src/PPDS.Migration.Cli/Commands/MigrateCommand.cs index ebf355b2..c7f4c9ad 100644 --- a/src/PPDS.Migration.Cli/Commands/MigrateCommand.cs +++ b/src/PPDS.Migration.Cli/Commands/MigrateCommand.cs @@ -1,4 +1,9 @@ using System.CommandLine; +using Microsoft.Extensions.DependencyInjection; +using PPDS.Migration.Cli.Infrastructure; +using PPDS.Migration.Export; +using PPDS.Migration.Import; +using PPDS.Migration.Progress; namespace PPDS.Migration.Cli.Commands; @@ -142,46 +147,84 @@ private static async Task ExecuteAsync( // Create temp file path for intermediate data tempDataFile = Path.Combine(tempDirectory, $"ppds-migrate-{Guid.NewGuid():N}.zip"); - ConsoleOutput.WriteProgress("analyzing", "Parsing schema...", json); - ConsoleOutput.WriteProgress("analyzing", "Building dependency graph...", json); + // Create progress reporter + var progressReporter = ServiceFactory.CreateProgressReporter(json); - // TODO: Implement when PPDS.Migration is ready // Phase 1: Export from source - // ConsoleOutput.WriteProgress("export", "Connecting to source environment...", json); - // var exportOptions = new ExportOptions - // { - // ConnectionString = sourceConnection, - // SchemaPath = schema.FullName, - // OutputPath = tempDataFile - // }; - // var exporter = new DataverseExporter(exportOptions); - // await exporter.ExportAsync(cancellationToken); + if (!json) + { + Console.WriteLine("Phase 1: Exporting from source environment..."); + } + progressReporter.Report(new ProgressEventArgs + { + Phase = MigrationPhase.Analyzing, + Message = "Connecting to source environment..." + }); + + await using var sourceProvider = ServiceFactory.CreateProvider(sourceConnection, "Source"); + var exporter = sourceProvider.GetRequiredService(); + + var exportResult = await exporter.ExportAsync( + schema.FullName, + tempDataFile, + new ExportOptions(), + progressReporter, + cancellationToken); + + if (!exportResult.Success) + { + ConsoleOutput.WriteError($"Export failed with {exportResult.Errors.Count} error(s).", json); + return ExitCodes.Failure; + } // Phase 2: Import to target - // ConsoleOutput.WriteProgress("import", "Connecting to target environment...", json); - // var importOptions = new ImportOptions - // { - // ConnectionString = targetConnection, - // DataPath = tempDataFile, - // BatchSize = batchSize, - // BypassPlugins = bypassPlugins, - // BypassFlows = bypassFlows - // }; - // var importer = new DataverseImporter(importOptions); - // await importer.ImportAsync(cancellationToken); - - ConsoleOutput.WriteProgress("export", "Export phase not yet implemented - waiting for PPDS.Migration", json); - ConsoleOutput.WriteProgress("import", "Import phase not yet implemented - waiting for PPDS.Migration", json); - await Task.Delay(100, cancellationToken); // Placeholder + if (!json) + { + Console.WriteLine(); + Console.WriteLine("Phase 2: Importing to target environment..."); + } + progressReporter.Report(new ProgressEventArgs + { + Phase = MigrationPhase.Analyzing, + Message = "Connecting to target environment..." + }); + + await using var targetProvider = ServiceFactory.CreateProvider(targetConnection, "Target"); + var importer = targetProvider.GetRequiredService(); + + var importOptions = new ImportOptions + { + BatchSize = batchSize, + BypassCustomPluginExecution = bypassPlugins, + BypassPowerAutomateFlows = bypassFlows + }; + + var importResult = await importer.ImportAsync( + tempDataFile, + importOptions, + progressReporter, + cancellationToken); + + if (!importResult.Success) + { + ConsoleOutput.WriteError($"Import failed with {importResult.Errors.Count} error(s).", json); + return ExitCodes.Failure; + } + // Report completion if (!json) { Console.WriteLine(); Console.WriteLine("Migration completed successfully."); + Console.WriteLine($"Exported: {exportResult.RecordsExported:N0} records"); + Console.WriteLine($"Imported: {importResult.RecordsImported:N0} records"); + Console.WriteLine($"Total duration: {exportResult.Duration + importResult.Duration:hh\\:mm\\:ss}"); } else { - ConsoleOutput.WriteCompletion(TimeSpan.Zero, 0, 0, json); + var totalRecords = exportResult.RecordsExported; + var totalDuration = exportResult.Duration + importResult.Duration; + ConsoleOutput.WriteCompletion(totalDuration, totalRecords, 0, json); } return ExitCodes.Success; diff --git a/src/PPDS.Migration.Cli/Commands/SchemaCommand.cs b/src/PPDS.Migration.Cli/Commands/SchemaCommand.cs new file mode 100644 index 00000000..614320e7 --- /dev/null +++ b/src/PPDS.Migration.Cli/Commands/SchemaCommand.cs @@ -0,0 +1,407 @@ +using System.CommandLine; +using Microsoft.Extensions.DependencyInjection; +using PPDS.Migration.Cli.Infrastructure; +using PPDS.Migration.Formats; +using PPDS.Migration.Schema; + +namespace PPDS.Migration.Cli.Commands; + +/// +/// Schema generation and management commands. +/// +public static class SchemaCommand +{ + public static Command Create() + { + var command = new Command("schema", "Generate and manage migration schemas"); + + command.AddCommand(CreateGenerateCommand()); + command.AddCommand(CreateListCommand()); + + return command; + } + + private static Command CreateGenerateCommand() + { + var connectionOption = new Option( + aliases: ["--connection", "-c"], + description: ConnectionResolver.GetHelpDescription(ConnectionResolver.ConnectionEnvVar)); + + var entitiesOption = new Option( + aliases: ["--entities", "-e"], + description: "Entity logical names to include (comma-separated or multiple -e flags)") + { + IsRequired = true, + AllowMultipleArgumentsPerToken = true + }; + + var outputOption = new Option( + aliases: ["--output", "-o"], + description: "Output schema file path") + { + IsRequired = true + }; + + var includeSystemFieldsOption = new Option( + name: "--include-system-fields", + getDefaultValue: () => false, + description: "Include system fields (createdon, modifiedon, etc.)"); + + var includeRelationshipsOption = new Option( + name: "--include-relationships", + getDefaultValue: () => true, + description: "Include relationship definitions"); + + var disablePluginsOption = new Option( + name: "--disable-plugins", + getDefaultValue: () => false, + description: "Set disableplugins=true on all entities"); + + var includeAttributesOption = new Option( + aliases: ["--include-attributes", "-a"], + description: "Only include these attributes (whitelist, comma-separated or multiple flags)") + { + AllowMultipleArgumentsPerToken = true + }; + + var excludeAttributesOption = new Option( + name: "--exclude-attributes", + description: "Exclude these attributes (blacklist, comma-separated)") + { + AllowMultipleArgumentsPerToken = true + }; + + var excludePatternsOption = new Option( + name: "--exclude-patterns", + description: "Exclude attributes matching patterns (e.g., 'new_*', '*_base')") + { + AllowMultipleArgumentsPerToken = true + }; + + var jsonOption = new Option( + name: "--json", + getDefaultValue: () => false, + description: "Output progress as JSON"); + + var verboseOption = new Option( + aliases: ["--verbose", "-v"], + getDefaultValue: () => false, + description: "Verbose output"); + + var command = new Command("generate", "Generate a migration schema from Dataverse metadata") + { + connectionOption, + entitiesOption, + outputOption, + includeSystemFieldsOption, + includeRelationshipsOption, + disablePluginsOption, + includeAttributesOption, + excludeAttributesOption, + excludePatternsOption, + jsonOption, + verboseOption + }; + + command.SetHandler(async (context) => + { + var connectionArg = context.ParseResult.GetValueForOption(connectionOption); + var entities = context.ParseResult.GetValueForOption(entitiesOption)!; + var output = context.ParseResult.GetValueForOption(outputOption)!; + var includeSystemFields = context.ParseResult.GetValueForOption(includeSystemFieldsOption); + var includeRelationships = context.ParseResult.GetValueForOption(includeRelationshipsOption); + var disablePlugins = context.ParseResult.GetValueForOption(disablePluginsOption); + var includeAttributes = context.ParseResult.GetValueForOption(includeAttributesOption); + var excludeAttributes = context.ParseResult.GetValueForOption(excludeAttributesOption); + var excludePatterns = context.ParseResult.GetValueForOption(excludePatternsOption); + var json = context.ParseResult.GetValueForOption(jsonOption); + var verbose = context.ParseResult.GetValueForOption(verboseOption); + + // Resolve connection string + string connection; + try + { + connection = ConnectionResolver.Resolve( + connectionArg, + ConnectionResolver.ConnectionEnvVar, + "connection"); + } + catch (InvalidOperationException ex) + { + ConsoleOutput.WriteError(ex.Message, json); + context.ExitCode = ExitCodes.InvalidArguments; + return; + } + + // Parse entities (handle comma-separated and multiple flags) + var entityList = entities + .SelectMany(e => e.Split(',', StringSplitOptions.RemoveEmptyEntries)) + .Select(e => e.Trim()) + .Distinct(StringComparer.OrdinalIgnoreCase) + .ToList(); + + if (entityList.Count == 0) + { + ConsoleOutput.WriteError("No entities specified.", json); + context.ExitCode = ExitCodes.InvalidArguments; + return; + } + + // Parse attribute lists (handle comma-separated) + var includeAttrList = ParseAttributeList(includeAttributes); + var excludeAttrList = ParseAttributeList(excludeAttributes); + var excludePatternList = ParseAttributeList(excludePatterns); + + context.ExitCode = await ExecuteGenerateAsync( + connection, entityList, output, + includeSystemFields, includeRelationships, disablePlugins, + includeAttrList, excludeAttrList, excludePatternList, + json, verbose, context.GetCancellationToken()); + }); + + return command; + } + + private static Command CreateListCommand() + { + var connectionOption = new Option( + aliases: ["--connection", "-c"], + description: ConnectionResolver.GetHelpDescription(ConnectionResolver.ConnectionEnvVar)); + + var filterOption = new Option( + aliases: ["--filter", "-f"], + description: "Filter entities by name pattern (e.g., 'account*' or '*custom*')"); + + var customOnlyOption = new Option( + name: "--custom-only", + getDefaultValue: () => false, + description: "Show only custom entities"); + + var jsonOption = new Option( + name: "--json", + getDefaultValue: () => false, + description: "Output as JSON"); + + var command = new Command("list", "List available entities in Dataverse") + { + connectionOption, + filterOption, + customOnlyOption, + jsonOption + }; + + command.SetHandler(async (context) => + { + var connectionArg = context.ParseResult.GetValueForOption(connectionOption); + var filter = context.ParseResult.GetValueForOption(filterOption); + var customOnly = context.ParseResult.GetValueForOption(customOnlyOption); + var json = context.ParseResult.GetValueForOption(jsonOption); + + // Resolve connection string + string connection; + try + { + connection = ConnectionResolver.Resolve( + connectionArg, + ConnectionResolver.ConnectionEnvVar, + "connection"); + } + catch (InvalidOperationException ex) + { + ConsoleOutput.WriteError(ex.Message, json); + context.ExitCode = ExitCodes.InvalidArguments; + return; + } + + context.ExitCode = await ExecuteListAsync( + connection, filter, customOnly, json, context.GetCancellationToken()); + }); + + return command; + } + + private static List? ParseAttributeList(string[]? input) + { + if (input == null || input.Length == 0) + { + return null; + } + + return input + .SelectMany(a => a.Split(',', StringSplitOptions.RemoveEmptyEntries)) + .Select(a => a.Trim()) + .Distinct(StringComparer.OrdinalIgnoreCase) + .ToList(); + } + + private static async Task ExecuteGenerateAsync( + string connection, + List entities, + FileInfo output, + bool includeSystemFields, + bool includeRelationships, + bool disablePlugins, + List? includeAttributes, + List? excludeAttributes, + List? excludePatterns, + bool json, + bool verbose, + CancellationToken cancellationToken) + { + try + { + if (!json) + { + Console.WriteLine($"Generating schema for {entities.Count} entities..."); + if (includeAttributes != null) + { + Console.WriteLine($" Including only: {string.Join(", ", includeAttributes)}"); + } + if (excludeAttributes != null) + { + Console.WriteLine($" Excluding: {string.Join(", ", excludeAttributes)}"); + } + if (excludePatterns != null) + { + Console.WriteLine($" Excluding patterns: {string.Join(", ", excludePatterns)}"); + } + } + + await using var serviceProvider = ServiceFactory.CreateProvider(connection); + var generator = serviceProvider.GetRequiredService(); + var schemaWriter = serviceProvider.GetRequiredService(); + var progressReporter = ServiceFactory.CreateProgressReporter(json); + + var options = new SchemaGeneratorOptions + { + IncludeSystemFields = includeSystemFields, + IncludeRelationships = includeRelationships, + DisablePluginsByDefault = disablePlugins, + IncludeAttributes = includeAttributes, + ExcludeAttributes = excludeAttributes, + ExcludeAttributePatterns = excludePatterns + }; + + var schema = await generator.GenerateAsync( + entities, options, progressReporter, cancellationToken); + + await schemaWriter.WriteAsync(schema, output.FullName, cancellationToken); + + if (!json) + { + Console.WriteLine(); + Console.WriteLine("Schema generated successfully."); + Console.WriteLine($"Output: {output.FullName}"); + Console.WriteLine($"Entities: {schema.Entities.Count}"); + + var totalFields = schema.Entities.Sum(e => e.Fields.Count); + var totalRelationships = schema.Entities.Sum(e => e.Relationships.Count); + Console.WriteLine($"Fields: {totalFields}, Relationships: {totalRelationships}"); + } + + return ExitCodes.Success; + } + catch (OperationCanceledException) + { + ConsoleOutput.WriteError("Schema generation cancelled by user.", json); + return ExitCodes.Failure; + } + catch (Exception ex) + { + ConsoleOutput.WriteError($"Schema generation failed: {ex.Message}", json); + if (verbose) + { + Console.Error.WriteLine(ex.StackTrace); + } + return ExitCodes.Failure; + } + } + + private static async Task ExecuteListAsync( + string connection, + string? filter, + bool customOnly, + bool json, + CancellationToken cancellationToken) + { + try + { + if (!json) + { + Console.WriteLine("Retrieving available entities..."); + } + + await using var serviceProvider = ServiceFactory.CreateProvider(connection); + var generator = serviceProvider.GetRequiredService(); + + var entities = await generator.GetAvailableEntitiesAsync(cancellationToken); + + // Apply filters + var filtered = entities.AsEnumerable(); + + if (customOnly) + { + filtered = filtered.Where(e => e.IsCustomEntity); + } + + if (!string.IsNullOrEmpty(filter)) + { + var pattern = filter.Replace("*", ""); + if (filter.StartsWith('*') && filter.EndsWith('*')) + { + filtered = filtered.Where(e => e.LogicalName.Contains(pattern, StringComparison.OrdinalIgnoreCase)); + } + else if (filter.StartsWith('*')) + { + filtered = filtered.Where(e => e.LogicalName.EndsWith(pattern, StringComparison.OrdinalIgnoreCase)); + } + else if (filter.EndsWith('*')) + { + filtered = filtered.Where(e => e.LogicalName.StartsWith(pattern, StringComparison.OrdinalIgnoreCase)); + } + else + { + filtered = filtered.Where(e => e.LogicalName.Equals(filter, StringComparison.OrdinalIgnoreCase)); + } + } + + var result = filtered.ToList(); + + if (json) + { + var jsonOutput = System.Text.Json.JsonSerializer.Serialize(result, new System.Text.Json.JsonSerializerOptions + { + WriteIndented = true + }); + Console.WriteLine(jsonOutput); + } + else + { + Console.WriteLine(); + Console.WriteLine($"{"Logical Name",-40} {"Display Name",-40} {"Custom"}"); + Console.WriteLine(new string('-', 90)); + + foreach (var entity in result) + { + var customMarker = entity.IsCustomEntity ? "Yes" : ""; + Console.WriteLine($"{entity.LogicalName,-40} {entity.DisplayName,-40} {customMarker}"); + } + + Console.WriteLine(); + Console.WriteLine($"Total: {result.Count} entities"); + } + + return ExitCodes.Success; + } + catch (OperationCanceledException) + { + ConsoleOutput.WriteError("Operation cancelled by user.", json); + return ExitCodes.Failure; + } + catch (Exception ex) + { + ConsoleOutput.WriteError($"Failed to list entities: {ex.Message}", json); + return ExitCodes.Failure; + } + } +} diff --git a/src/PPDS.Migration.Cli/Infrastructure/ServiceFactory.cs b/src/PPDS.Migration.Cli/Infrastructure/ServiceFactory.cs new file mode 100644 index 00000000..30409544 --- /dev/null +++ b/src/PPDS.Migration.Cli/Infrastructure/ServiceFactory.cs @@ -0,0 +1,82 @@ +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using PPDS.Dataverse.DependencyInjection; +using PPDS.Dataverse.Pooling; +using PPDS.Migration.Analysis; +using PPDS.Migration.DependencyInjection; +using PPDS.Migration.Export; +using PPDS.Migration.Formats; +using PPDS.Migration.Import; +using PPDS.Migration.Progress; + +namespace PPDS.Migration.Cli.Infrastructure; + +/// +/// Factory for creating configured service providers for CLI commands. +/// +public static class ServiceFactory +{ + /// + /// Creates a service provider configured with a single Dataverse connection. + /// + /// The Dataverse connection string. + /// Optional name for the connection. Default: "Primary" + /// A configured service provider. + public static ServiceProvider CreateProvider(string connectionString, string connectionName = "Primary") + { + var services = new ServiceCollection(); + + // Add logging (minimal for CLI - no console output to avoid interfering with CLI) + services.AddLogging(builder => + { + builder.SetMinimumLevel(LogLevel.Warning); + }); + + // Add Dataverse connection pool + services.AddDataverseConnectionPool(options => + { + options.Connections.Add(new DataverseConnection(connectionName, connectionString)); + options.Pool.Enabled = true; + options.Pool.MaxPoolSize = Environment.ProcessorCount * 4; + options.Pool.DisableAffinityCookie = true; + }); + + // Add migration services + services.AddDataverseMigration(); + + return services.BuildServiceProvider(); + } + + /// + /// Creates a service provider for schema analysis (no connection required). + /// + /// A configured service provider with schema reading capabilities. + public static ServiceProvider CreateAnalysisProvider() + { + var services = new ServiceCollection(); + + // Add logging (minimal for CLI) + services.AddLogging(builder => + { + builder.SetMinimumLevel(LogLevel.Warning); + }); + + // Register only the analysis components (no Dataverse connection needed) + services.AddTransient(); + services.AddTransient(); + + return services.BuildServiceProvider(); + } + + /// + /// Creates a progress reporter based on the output mode. + /// + /// Whether to output JSON format. + /// An appropriate progress reporter. + public static IProgressReporter CreateProgressReporter(bool useJson) + { + return useJson + ? new JsonProgressReporter(Console.Out) + : new ConsoleProgressReporter(); + } +} diff --git a/src/PPDS.Migration.Cli/PPDS.Migration.Cli.csproj b/src/PPDS.Migration.Cli/PPDS.Migration.Cli.csproj index bf1d0101..f1e322a1 100644 --- a/src/PPDS.Migration.Cli/PPDS.Migration.Cli.csproj +++ b/src/PPDS.Migration.Cli/PPDS.Migration.Cli.csproj @@ -34,7 +34,7 @@ - + diff --git a/src/PPDS.Migration.Cli/Program.cs b/src/PPDS.Migration.Cli/Program.cs index 951000d9..22c14f2a 100644 --- a/src/PPDS.Migration.Cli/Program.cs +++ b/src/PPDS.Migration.Cli/Program.cs @@ -20,6 +20,7 @@ public static async Task Main(string[] args) rootCommand.AddCommand(ImportCommand.Create()); rootCommand.AddCommand(AnalyzeCommand.Create()); rootCommand.AddCommand(MigrateCommand.Create()); + rootCommand.AddCommand(SchemaCommand.Create()); // Handle cancellation using var cts = new CancellationTokenSource(); diff --git a/src/PPDS.Migration/Analysis/DependencyGraphBuilder.cs b/src/PPDS.Migration/Analysis/DependencyGraphBuilder.cs index a2d4df97..56c35b4b 100644 --- a/src/PPDS.Migration/Analysis/DependencyGraphBuilder.cs +++ b/src/PPDS.Migration/Analysis/DependencyGraphBuilder.cs @@ -65,28 +65,33 @@ public DependencyGraph Build(MigrationSchema schema) continue; } - // Only add edge if target entity is in schema - if (!entitySet.Contains(field.LookupEntity)) - { - _logger?.LogDebug("Ignoring lookup {Entity}.{Field} -> {Target} (not in schema)", - entity.LogicalName, field.LogicalName, field.LookupEntity); - continue; - } - - var dependencyType = field.Type.ToLowerInvariant() switch - { - "owner" => DependencyType.Owner, - "customer" => DependencyType.Customer, - _ => DependencyType.Lookup - }; - - edges.Add(new DependencyEdge - { - FromEntity = entity.LogicalName, - ToEntity = field.LookupEntity, - FieldName = field.LogicalName, - Type = dependencyType - }); + // Handle polymorphic lookups (e.g., "account|contact") + var targetEntities = field.LookupEntity.Split('|') + .Select(t => t.Trim()) + .Where(trimmedTarget => + { + if (!entitySet.Contains(trimmedTarget)) + { + _logger?.LogDebug("Ignoring lookup {Entity}.{Field} -> {Target} (not in schema)", + entity.LogicalName, field.LogicalName, trimmedTarget); + return false; + } + return true; + }) + .Select(trimmedTarget => new DependencyEdge + { + FromEntity = entity.LogicalName, + ToEntity = trimmedTarget, + FieldName = field.LogicalName, + Type = field.Type.ToLowerInvariant() switch + { + "owner" => DependencyType.Owner, + "customer" => DependencyType.Customer, + _ => DependencyType.Lookup + } + }); + + edges.AddRange(targetEntities); } } diff --git a/src/PPDS.Migration/DependencyInjection/ServiceCollectionExtensions.cs b/src/PPDS.Migration/DependencyInjection/ServiceCollectionExtensions.cs index f78a0281..a17cb93f 100644 --- a/src/PPDS.Migration/DependencyInjection/ServiceCollectionExtensions.cs +++ b/src/PPDS.Migration/DependencyInjection/ServiceCollectionExtensions.cs @@ -7,6 +7,7 @@ using PPDS.Migration.Formats; using PPDS.Migration.Import; using PPDS.Migration.Progress; +using PPDS.Migration.Schema; namespace PPDS.Migration.DependencyInjection { @@ -42,9 +43,13 @@ public static IServiceCollection AddDataverseMigration( // Formats services.AddTransient(); + services.AddTransient(); services.AddTransient(); services.AddTransient(); + // Schema generation + services.AddTransient(); + // Analysis services.AddTransient(); services.AddTransient(); @@ -53,6 +58,7 @@ public static IServiceCollection AddDataverseMigration( services.AddTransient(); // Import + services.AddTransient(); services.AddTransient(); // Progress reporters diff --git a/src/PPDS.Migration/Export/ParallelExporter.cs b/src/PPDS.Migration/Export/ParallelExporter.cs index 095b68fe..874ebfdf 100644 --- a/src/PPDS.Migration/Export/ParallelExporter.cs +++ b/src/PPDS.Migration/Export/ParallelExporter.cs @@ -135,6 +135,16 @@ await Parallel.ForEachAsync( } }).ConfigureAwait(false); + // Export M2M relationships + progress?.Report(new ProgressEventArgs + { + Phase = MigrationPhase.Exporting, + Message = "Exporting M2M relationships..." + }); + + var relationshipData = await ExportM2MRelationshipsAsync( + schema, entityData, options, progress, cancellationToken).ConfigureAwait(false); + // Write to output file progress?.Report(new ProgressEventArgs { @@ -146,6 +156,7 @@ await Parallel.ForEachAsync( { Schema = schema, EntityData = entityData, + RelationshipData = relationshipData, ExportedAt = DateTime.UtcNow }; @@ -218,7 +229,7 @@ private async Task ExportEntityAsync( { _logger?.LogDebug("Exporting entity {Entity}", entitySchema.LogicalName); - await using var client = await _connectionPool.GetClientAsync(null, cancellationToken).ConfigureAwait(false); + await using var client = await _connectionPool.GetClientAsync(null, cancellationToken: cancellationToken).ConfigureAwait(false); // Build FetchXML var fetchXml = BuildFetchXml(entitySchema, options.PageSize); @@ -296,6 +307,136 @@ private async Task ExportEntityAsync( } } + private async Task>> ExportM2MRelationshipsAsync( + MigrationSchema schema, + ConcurrentDictionary> entityData, + ExportOptions options, + IProgressReporter? progress, + CancellationToken cancellationToken) + { + var result = new Dictionary>(StringComparer.OrdinalIgnoreCase); + + foreach (var entitySchema in schema.Entities) + { + cancellationToken.ThrowIfCancellationRequested(); + + var m2mRelationships = entitySchema.Relationships.Where(r => r.IsManyToMany).ToList(); + if (m2mRelationships.Count == 0) + { + continue; + } + + // Only export M2M for records we actually exported + if (!entityData.TryGetValue(entitySchema.LogicalName, out var exportedRecords) || exportedRecords.Count == 0) + { + continue; + } + + var exportedIds = exportedRecords.Select(r => r.Id).ToHashSet(); + var entityM2MData = new List(); + + foreach (var rel in m2mRelationships) + { + progress?.Report(new ProgressEventArgs + { + Phase = MigrationPhase.Exporting, + Entity = entitySchema.LogicalName, + Message = $"Exporting M2M relationship {rel.Name}..." + }); + + try + { + var relData = await ExportM2MRelationshipAsync( + entitySchema, rel, exportedIds, options, cancellationToken).ConfigureAwait(false); + entityM2MData.AddRange(relData); + } + catch (Exception ex) when (ex is not OperationCanceledException) + { + _logger?.LogWarning(ex, "Failed to export M2M relationship {Relationship} for entity {Entity}", + rel.Name, entitySchema.LogicalName); + } + } + + if (entityM2MData.Count > 0) + { + result[entitySchema.LogicalName] = entityM2MData; + _logger?.LogDebug("Exported {Count} M2M relationship groups for entity {Entity}", + entityM2MData.Count, entitySchema.LogicalName); + } + } + + return result; + } + + private async Task> ExportM2MRelationshipAsync( + EntitySchema entitySchema, + RelationshipSchema rel, + HashSet exportedSourceIds, + ExportOptions options, + CancellationToken cancellationToken) + { + await using var client = await _connectionPool.GetClientAsync(null, cancellationToken: cancellationToken).ConfigureAwait(false); + + // Query intersect entity to get all associations + var intersectEntity = rel.IntersectEntity ?? rel.Name; + var sourceIdField = $"{entitySchema.LogicalName}id"; + var targetIdField = rel.TargetEntityPrimaryKey ?? $"{rel.Entity2}id"; + + // Build FetchXML to query intersect entity + var fetchXml = $@" + + + + + "; + + var pageNumber = 1; + string? pagingCookie = null; + var associations = new List<(Guid SourceId, Guid TargetId)>(); + + while (true) + { + cancellationToken.ThrowIfCancellationRequested(); + + var pagedFetchXml = AddPaging(fetchXml, pageNumber, pagingCookie); + var response = await client.RetrieveMultipleAsync(new FetchExpression(pagedFetchXml)).ConfigureAwait(false); + + // Only include associations where both fields exist and source was exported + var validAssociations = response.Entities + .Where(entity => entity.Contains(sourceIdField) && entity.Contains(targetIdField)) + .Select(entity => ( + SourceId: entity.GetAttributeValue(sourceIdField), + TargetId: entity.GetAttributeValue(targetIdField))) + .Where(assoc => exportedSourceIds.Contains(assoc.SourceId)); + + associations.AddRange(validAssociations); + + if (!response.MoreRecords) + { + break; + } + + pagingCookie = response.PagingCookie; + pageNumber++; + } + + // Group by source ID (CMT format requirement) + var grouped = associations + .GroupBy(x => x.SourceId) + .Select(g => new ManyToManyRelationshipData + { + RelationshipName = rel.Name, + SourceEntityName = entitySchema.LogicalName, + SourceId = g.Key, + TargetEntityName = rel.Entity2, + TargetEntityPrimaryKey = targetIdField, + TargetIds = g.Select(x => x.TargetId).ToList() + }) + .ToList(); + + return grouped; + } + private string BuildFetchXml(EntitySchema entitySchema, int pageSize) { var fetch = new XElement("fetch", diff --git a/src/PPDS.Migration/Formats/CmtDataReader.cs b/src/PPDS.Migration/Formats/CmtDataReader.cs index 8b4a9927..6ec4fece 100644 --- a/src/PPDS.Migration/Formats/CmtDataReader.cs +++ b/src/PPDS.Migration/Formats/CmtDataReader.cs @@ -106,20 +106,22 @@ public async Task ReadAsync(Stream stream, IProgressReporter? pro await dataStream.CopyToAsync(dataMemoryStream, cancellationToken).ConfigureAwait(false); dataMemoryStream.Position = 0; - var entityData = await ParseDataXmlAsync(dataMemoryStream, schema, progress, cancellationToken).ConfigureAwait(false); + var (entityData, relationshipData) = await ParseDataXmlAsync(dataMemoryStream, schema, progress, cancellationToken).ConfigureAwait(false); - _logger?.LogInformation("Parsed data with {RecordCount} total records", entityData.Values.Sum(v => v.Count)); + _logger?.LogInformation("Parsed data with {RecordCount} total records and {M2MCount} M2M relationship groups", + entityData.Values.Sum(v => v.Count), + relationshipData.Values.Sum(v => v.Count)); return new MigrationData { Schema = schema, EntityData = entityData, - RelationshipData = new Dictionary>(), + RelationshipData = relationshipData, ExportedAt = DateTime.UtcNow }; } - private async Task>> ParseDataXmlAsync( + private async Task<(IReadOnlyDictionary>, IReadOnlyDictionary>)> ParseDataXmlAsync( Stream stream, MigrationSchema schema, IProgressReporter? progress, @@ -137,7 +139,8 @@ private async Task>> ParseData ? root : root.Element("entities") ?? throw new InvalidOperationException("Data XML has no element"); - var result = new Dictionary>(StringComparer.OrdinalIgnoreCase); + var entityResult = new Dictionary>(StringComparer.OrdinalIgnoreCase); + var relationshipResult = new Dictionary>(StringComparer.OrdinalIgnoreCase); foreach (var entityElement in entitiesElement.Elements("entity")) { @@ -163,9 +166,61 @@ private async Task>> ParseData if (records.Count > 0) { - result[entityName] = records; + entityResult[entityName] = records; _logger?.LogDebug("Parsed {Count} records for entity {Entity}", records.Count, entityName); } + + // Parse M2M relationships + var m2mElement = entityElement.Element("m2mrelationships"); + if (m2mElement != null) + { + var m2mData = ParseM2MRelationships(m2mElement, entityName); + if (m2mData.Count > 0) + { + relationshipResult[entityName] = m2mData; + _logger?.LogDebug("Parsed {Count} M2M relationship groups for entity {Entity}", m2mData.Count, entityName); + } + } + } + + return (entityResult, relationshipResult); + } + + private List ParseM2MRelationships(XElement element, string sourceEntityName) + { + var result = new List(); + + foreach (var m2mRel in element.Elements("m2mrelationship")) + { + var sourceId = m2mRel.Attribute("sourceid")?.Value; + var targetEntityName = m2mRel.Attribute("targetentityname")?.Value; + var targetEntityPrimaryKey = m2mRel.Attribute("targetentitynameidfield")?.Value; + var relationshipName = m2mRel.Attribute("m2mrelationshipname")?.Value; + + if (string.IsNullOrEmpty(sourceId) || !Guid.TryParse(sourceId, out var sourceGuid)) + { + continue; + } + + var targetIdsElement = m2mRel.Element("targetids"); + var targetIds = targetIdsElement?.Elements("targetid") + .Select(e => Guid.TryParse(e.Value, out var g) ? g : (Guid?)null) + .Where(g => g.HasValue) + .Select(g => g!.Value) + .ToList() ?? new List(); + + if (targetIds.Count > 0) + { + result.Add(new ManyToManyRelationshipData + { + RelationshipName = relationshipName ?? string.Empty, + SourceEntityName = sourceEntityName, + SourceId = sourceGuid, + TargetEntityName = targetEntityName ?? string.Empty, + TargetEntityPrimaryKey = targetEntityPrimaryKey ?? string.Empty, + TargetIds = targetIds + }); + } } return result; @@ -220,10 +275,10 @@ private async Task>> ParseData "int" or "integer" => int.TryParse(value, out var i) ? i : null, "decimal" or "money" => decimal.TryParse(value, NumberStyles.Any, CultureInfo.InvariantCulture, out var d) ? d : null, "float" or "double" => double.TryParse(value, NumberStyles.Any, CultureInfo.InvariantCulture, out var f) ? f : null, - "bool" or "boolean" => bool.TryParse(value, out var b) ? b : null, + "bool" or "boolean" => value == "1" || (bool.TryParse(value, out var b) && b), "datetime" => DateTime.TryParse(value, CultureInfo.InvariantCulture, DateTimeStyles.AssumeUniversal, out var dt) ? dt : null, "guid" or "uniqueidentifier" => Guid.TryParse(value, out var g) ? g : null, - "lookup" or "customer" or "owner" => ParseEntityReference(element), + "lookup" or "customer" or "owner" or "entityreference" => ParseEntityReference(element), "optionset" or "picklist" => ParseOptionSetValue(value), "state" or "status" => ParseOptionSetValue(value), _ => value // Return as string for unknown types @@ -232,9 +287,16 @@ private async Task>> ParseData private EntityReference? ParseEntityReference(XElement element) { - var idValue = element.Attribute("value")?.Value ?? element.Attribute("id")?.Value; - var entityName = element.Attribute("lookupentity")?.Value ?? element.Attribute("type")?.Value; - var name = element.Attribute("lookupentityname")?.Value ?? element.Attribute("name")?.Value; + // CMT format: GUID is element content, entity name is lookupentity attribute + var idValue = element.Value; // Element content (CMT format) + if (string.IsNullOrEmpty(idValue)) + { + // Fallback for other formats + idValue = element.Attribute("value")?.Value ?? element.Attribute("id")?.Value; + } + + var entityName = element.Attribute("lookupentity")?.Value; + var name = element.Attribute("lookupentityname")?.Value; if (string.IsNullOrEmpty(idValue) || !Guid.TryParse(idValue, out var id)) { diff --git a/src/PPDS.Migration/Formats/CmtDataWriter.cs b/src/PPDS.Migration/Formats/CmtDataWriter.cs index 4945e11b..94d33280 100644 --- a/src/PPDS.Migration/Formats/CmtDataWriter.cs +++ b/src/PPDS.Migration/Formats/CmtDataWriter.cs @@ -2,6 +2,7 @@ using System.Globalization; using System.IO; using System.IO.Compression; +using System.Linq; using System.Text; using System.Threading; using System.Threading.Tasks; @@ -60,6 +61,13 @@ public async Task WriteAsync(MigrationData data, Stream stream, IProgressReporte using var archive = new ZipArchive(stream, ZipArchiveMode.Create, leaveOpen: true); + // Write [Content_Types].xml (required by CMT) + var contentTypesEntry = archive.CreateEntry("[Content_Types].xml", CompressionLevel.Optimal); + using (var contentTypesStream = contentTypesEntry.Open()) + { + await WriteContentTypesAsync(contentTypesStream).ConfigureAwait(false); + } + // Write data.xml progress?.Report(new ProgressEventArgs { @@ -89,6 +97,32 @@ public async Task WriteAsync(MigrationData data, Stream stream, IProgressReporte _logger?.LogInformation("Wrote {RecordCount} total records", data.TotalRecordCount); } + private static async Task WriteContentTypesAsync(Stream stream) + { + var settings = new XmlWriterSettings + { + Async = true, + Indent = true, + Encoding = new UTF8Encoding(false) + }; + +#if NET8_0_OR_GREATER + await using var writer = XmlWriter.Create(stream, settings); +#else + using var writer = XmlWriter.Create(stream, settings); +#endif + + await writer.WriteStartDocumentAsync().ConfigureAwait(false); + await writer.WriteStartElementAsync(null, "Types", "http://schemas.openxmlformats.org/package/2006/content-types").ConfigureAwait(false); + await writer.WriteStartElementAsync(null, "Default", null).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "Extension", null, "xml").ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "ContentType", null, "application/octet-stream").ConfigureAwait(false); + await writer.WriteEndElementAsync().ConfigureAwait(false); // Default + await writer.WriteEndElementAsync().ConfigureAwait(false); // Types + await writer.WriteEndDocumentAsync().ConfigureAwait(false); + await writer.FlushAsync().ConfigureAwait(false); + } + private async Task WriteDataXmlAsync(MigrationData data, Stream stream, IProgressReporter? progress, CancellationToken cancellationToken) { var settings = new XmlWriterSettings @@ -106,15 +140,21 @@ private async Task WriteDataXmlAsync(MigrationData data, Stream stream, IProgres await writer.WriteStartDocumentAsync().ConfigureAwait(false); await writer.WriteStartElementAsync(null, "entities", null).ConfigureAwait(false); - await writer.WriteAttributeStringAsync(null, "timestamp", null, DateTime.UtcNow.ToString("O")).ConfigureAwait(false); + await writer.WriteAttributeStringAsync("xmlns", "xsd", null, "http://www.w3.org/2001/XMLSchema").ConfigureAwait(false); + await writer.WriteAttributeStringAsync("xmlns", "xsi", null, "http://www.w3.org/2001/XMLSchema-instance").ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "timestamp", null, DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ss.fffffffZ")).ConfigureAwait(false); foreach (var (entityName, records) in data.EntityData) { cancellationToken.ThrowIfCancellationRequested(); + // Get display name from schema + var entitySchema = data.Schema.Entities.FirstOrDefault(e => e.LogicalName == entityName); + var displayName = entitySchema?.DisplayName ?? entityName; + await writer.WriteStartElementAsync(null, "entity", null).ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "name", null, entityName).ConfigureAwait(false); - await writer.WriteAttributeStringAsync(null, "recordcount", null, records.Count.ToString()).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "displayname", null, displayName).ConfigureAwait(false); await writer.WriteStartElementAsync(null, "records", null).ConfigureAwait(false); @@ -124,6 +164,18 @@ private async Task WriteDataXmlAsync(MigrationData data, Stream stream, IProgres } await writer.WriteEndElementAsync().ConfigureAwait(false); // records + + // Write m2mrelationships + await writer.WriteStartElementAsync(null, "m2mrelationships", null).ConfigureAwait(false); + if (data.RelationshipData.TryGetValue(entityName, out var m2mList)) + { + foreach (var m2m in m2mList) + { + await WriteM2MRelationshipAsync(writer, m2m).ConfigureAwait(false); + } + } + await writer.WriteEndElementAsync().ConfigureAwait(false); // m2mrelationships + await writer.WriteEndElementAsync().ConfigureAwait(false); // entity } @@ -139,11 +191,6 @@ private async Task WriteRecordAsync(XmlWriter writer, Entity record) foreach (var attribute in record.Attributes) { - if (attribute.Key == record.LogicalName + "id") - { - continue; // Skip primary ID field as it's in the record id attribute - } - await WriteFieldAsync(writer, attribute.Key, attribute.Value).ConfigureAwait(false); } @@ -160,10 +207,11 @@ private async Task WriteFieldAsync(XmlWriter writer, string name, object? value) await writer.WriteStartElementAsync(null, "field", null).ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "name", null, name).ConfigureAwait(false); + // CMT format: value in attribute, type-specific additional attributes + string stringValue; switch (value) { case EntityReference er: - await writer.WriteAttributeStringAsync(null, "type", null, "lookup").ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "value", null, er.Id.ToString()).ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "lookupentity", null, er.LogicalName).ConfigureAwait(false); if (!string.IsNullOrEmpty(er.Name)) @@ -173,54 +221,65 @@ private async Task WriteFieldAsync(XmlWriter writer, string name, object? value) break; case OptionSetValue osv: - await writer.WriteAttributeStringAsync(null, "type", null, "optionset").ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "value", null, osv.Value.ToString()).ConfigureAwait(false); break; case Money m: - await writer.WriteAttributeStringAsync(null, "type", null, "money").ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "value", null, m.Value.ToString(CultureInfo.InvariantCulture)).ConfigureAwait(false); break; case DateTime dt: - await writer.WriteAttributeStringAsync(null, "type", null, "datetime").ConfigureAwait(false); - await writer.WriteAttributeStringAsync(null, "value", null, dt.ToString("O")).ConfigureAwait(false); + // CMT uses ISO 8601 format with 7 decimal places + stringValue = dt.ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ss.fffffffZ", CultureInfo.InvariantCulture); + await writer.WriteAttributeStringAsync(null, "value", null, stringValue).ConfigureAwait(false); break; case bool b: - await writer.WriteAttributeStringAsync(null, "type", null, "bool").ConfigureAwait(false); - await writer.WriteAttributeStringAsync(null, "value", null, b.ToString().ToLowerInvariant()).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "value", null, b ? "1" : "0").ConfigureAwait(false); break; case Guid g: - await writer.WriteAttributeStringAsync(null, "type", null, "guid").ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "value", null, g.ToString()).ConfigureAwait(false); break; case int i: - await writer.WriteAttributeStringAsync(null, "type", null, "int").ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "value", null, i.ToString()).ConfigureAwait(false); break; case decimal d: - await writer.WriteAttributeStringAsync(null, "type", null, "decimal").ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "value", null, d.ToString(CultureInfo.InvariantCulture)).ConfigureAwait(false); break; case double dbl: - await writer.WriteAttributeStringAsync(null, "type", null, "float").ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "value", null, dbl.ToString(CultureInfo.InvariantCulture)).ConfigureAwait(false); break; default: - await writer.WriteAttributeStringAsync(null, "type", null, "string").ConfigureAwait(false); - await writer.WriteAttributeStringAsync(null, "value", null, value.ToString()).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "value", null, value.ToString() ?? string.Empty).ConfigureAwait(false); break; } await writer.WriteEndElementAsync().ConfigureAwait(false); // field } + private async Task WriteM2MRelationshipAsync(XmlWriter writer, ManyToManyRelationshipData m2m) + { + await writer.WriteStartElementAsync(null, "m2mrelationship", null).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "sourceid", null, m2m.SourceId.ToString()).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "targetentityname", null, m2m.TargetEntityName).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "targetentitynameidfield", null, m2m.TargetEntityPrimaryKey).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "m2mrelationshipname", null, m2m.RelationshipName).ConfigureAwait(false); + + await writer.WriteStartElementAsync(null, "targetids", null).ConfigureAwait(false); + foreach (var targetId in m2m.TargetIds) + { + await writer.WriteElementStringAsync(null, "targetid", null, targetId.ToString()).ConfigureAwait(false); + } + await writer.WriteEndElementAsync().ConfigureAwait(false); // targetids + + await writer.WriteEndElementAsync().ConfigureAwait(false); // m2mrelationship + } + private async Task WriteSchemaXmlAsync(MigrationSchema schema, Stream stream, CancellationToken cancellationToken) { var settings = new XmlWriterSettings @@ -238,8 +297,6 @@ private async Task WriteSchemaXmlAsync(MigrationSchema schema, Stream stream, Ca await writer.WriteStartDocumentAsync().ConfigureAwait(false); await writer.WriteStartElementAsync(null, "entities", null).ConfigureAwait(false); - await writer.WriteAttributeStringAsync(null, "version", null, schema.Version).ConfigureAwait(false); - await writer.WriteAttributeStringAsync(null, "timestamp", null, DateTime.UtcNow.ToString("O")).ConfigureAwait(false); foreach (var entity in schema.Entities) { @@ -248,6 +305,7 @@ private async Task WriteSchemaXmlAsync(MigrationSchema schema, Stream stream, Ca await writer.WriteStartElementAsync(null, "entity", null).ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "name", null, entity.LogicalName).ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "displayname", null, entity.DisplayName).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "etc", null, (entity.ObjectTypeCode ?? 0).ToString()).ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "primaryidfield", null, entity.PrimaryIdField).ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "primarynamefield", null, entity.PrimaryNameField).ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "disableplugins", null, entity.DisablePlugins.ToString().ToLowerInvariant()).ConfigureAwait(false); @@ -257,31 +315,25 @@ private async Task WriteSchemaXmlAsync(MigrationSchema schema, Stream stream, Ca foreach (var field in entity.Fields) { await writer.WriteStartElementAsync(null, "field", null).ConfigureAwait(false); - await writer.WriteAttributeStringAsync(null, "name", null, field.LogicalName).ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "displayname", null, field.DisplayName).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "name", null, field.LogicalName).ConfigureAwait(false); await writer.WriteAttributeStringAsync(null, "type", null, field.Type).ConfigureAwait(false); + if (field.IsPrimaryKey) + { + await writer.WriteAttributeStringAsync(null, "primaryKey", null, "true").ConfigureAwait(false); + } if (!string.IsNullOrEmpty(field.LookupEntity)) { await writer.WriteAttributeStringAsync(null, "lookupType", null, field.LookupEntity).ConfigureAwait(false); } - await writer.WriteAttributeStringAsync(null, "customfield", null, field.IsCustomField.ToString().ToLowerInvariant()).ConfigureAwait(false); await writer.WriteEndElementAsync().ConfigureAwait(false); // field } await writer.WriteEndElementAsync().ConfigureAwait(false); // fields - // Write relationships - if (entity.Relationships.Count > 0) + // Write filter if present (HTML-encoded) + if (!string.IsNullOrEmpty(entity.FetchXmlFilter)) { - await writer.WriteStartElementAsync(null, "relationships", null).ConfigureAwait(false); - foreach (var rel in entity.Relationships) - { - await writer.WriteStartElementAsync(null, "relationship", null).ConfigureAwait(false); - await writer.WriteAttributeStringAsync(null, "name", null, rel.Name).ConfigureAwait(false); - await writer.WriteAttributeStringAsync(null, "m2m", null, rel.IsManyToMany.ToString().ToLowerInvariant()).ConfigureAwait(false); - await writer.WriteAttributeStringAsync(null, "relatedEntityName", null, rel.Entity2).ConfigureAwait(false); - await writer.WriteEndElementAsync().ConfigureAwait(false); // relationship - } - await writer.WriteEndElementAsync().ConfigureAwait(false); // relationships + await writer.WriteElementStringAsync(null, "filter", null, entity.FetchXmlFilter).ConfigureAwait(false); } await writer.WriteEndElementAsync().ConfigureAwait(false); // entity diff --git a/src/PPDS.Migration/Formats/CmtSchemaReader.cs b/src/PPDS.Migration/Formats/CmtSchemaReader.cs index 389ca639..ffe94437 100644 --- a/src/PPDS.Migration/Formats/CmtSchemaReader.cs +++ b/src/PPDS.Migration/Formats/CmtSchemaReader.cs @@ -107,6 +107,7 @@ private EntitySchema ParseEntity(XElement element) { var logicalName = element.Attribute("name")?.Value ?? string.Empty; var displayName = element.Attribute("displayname")?.Value ?? logicalName; + var objectTypeCode = ParseInt(element.Attribute("etc")?.Value); var primaryIdField = element.Attribute("primaryidfield")?.Value ?? $"{logicalName}id"; var primaryNameField = element.Attribute("primarynamefield")?.Value ?? "name"; var disablePlugins = ParseBool(element.Attribute("disableplugins")?.Value); @@ -144,7 +145,7 @@ private EntitySchema ParseEntity(XElement element) PrimaryIdField = primaryIdField, PrimaryNameField = primaryNameField, DisablePlugins = disablePlugins, - ObjectTypeCode = ParseInt(element.Attribute("objecttypecode")?.Value), + ObjectTypeCode = objectTypeCode, Fields = fields, Relationships = relationships, FetchXmlFilter = fetchXmlFilter @@ -159,6 +160,7 @@ private FieldSchema ParseField(XElement element) var lookupEntity = element.Attribute("lookupType")?.Value; var isCustomField = ParseBool(element.Attribute("customfield")?.Value); var isRequired = ParseBool(element.Attribute("isrequired")?.Value); + var isPrimaryKey = ParseBool(element.Attribute("primaryKey")?.Value); return new FieldSchema { @@ -168,6 +170,7 @@ private FieldSchema ParseField(XElement element) LookupEntity = lookupEntity, IsCustomField = isCustomField, IsRequired = isRequired, + IsPrimaryKey = isPrimaryKey, MaxLength = ParseInt(element.Attribute("maxlength")?.Value), Precision = ParseInt(element.Attribute("precision")?.Value) }; @@ -176,19 +179,49 @@ private FieldSchema ParseField(XElement element) private RelationshipSchema ParseRelationship(XElement element, string parentEntity) { var name = element.Attribute("name")?.Value ?? string.Empty; - var isManyToMany = ParseBool(element.Attribute("m2m")?.Value); + var isManyToMany = ParseBool(element.Attribute("manyToMany")?.Value); + var isReflexive = ParseBool(element.Attribute("isreflexive")?.Value); var relatedEntity = element.Attribute("relatedEntityName")?.Value ?? string.Empty; - var intersectEntity = element.Attribute("intersectentity")?.Value; + var intersectEntity = element.Attribute("intersectEntityName")?.Value; + + string entity1, entity1Attribute, entity2, entity2Attribute; + string? targetEntityPrimaryKey = null; + + if (isManyToMany) + { + // M2M relationship + // In CMT format: relatedEntityName is the intersect entity, m2mTargetEntity is the target + entity1 = parentEntity; + entity1Attribute = string.Empty; + entity2 = element.Attribute("m2mTargetEntity")?.Value ?? relatedEntity; + entity2Attribute = string.Empty; + targetEntityPrimaryKey = element.Attribute("m2mTargetEntityPrimaryKey")?.Value; + // If intersectEntity not explicitly set, use relatedEntityName for M2M + if (string.IsNullOrEmpty(intersectEntity)) + { + intersectEntity = relatedEntity; + } + } + else + { + // One-to-Many relationship + entity1 = element.Attribute("referencingEntity")?.Value ?? parentEntity; + entity1Attribute = element.Attribute("referencingAttribute")?.Value ?? string.Empty; + entity2 = element.Attribute("referencedEntity")?.Value ?? relatedEntity; + entity2Attribute = element.Attribute("referencedAttribute")?.Value ?? string.Empty; + } return new RelationshipSchema { Name = name, - Entity1 = parentEntity, - Entity1Attribute = element.Attribute("entity1attribute")?.Value ?? string.Empty, - Entity2 = relatedEntity, - Entity2Attribute = element.Attribute("entity2attribute")?.Value ?? string.Empty, + Entity1 = entity1, + Entity1Attribute = entity1Attribute, + Entity2 = entity2, + Entity2Attribute = entity2Attribute, IsManyToMany = isManyToMany, - IntersectEntity = intersectEntity + IsReflexive = isReflexive, + IntersectEntity = intersectEntity, + TargetEntityPrimaryKey = targetEntityPrimaryKey }; } diff --git a/src/PPDS.Migration/Formats/CmtSchemaWriter.cs b/src/PPDS.Migration/Formats/CmtSchemaWriter.cs new file mode 100644 index 00000000..5e8a5759 --- /dev/null +++ b/src/PPDS.Migration/Formats/CmtSchemaWriter.cs @@ -0,0 +1,175 @@ +using System; +using System.IO; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using System.Xml; +using Microsoft.Extensions.Logging; +using PPDS.Migration.Models; + +namespace PPDS.Migration.Formats +{ + /// + /// Writes CMT-compatible schema files. + /// + public class CmtSchemaWriter : ICmtSchemaWriter + { + private readonly ILogger? _logger; + + /// + /// Initializes a new instance of the class. + /// + public CmtSchemaWriter() + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The logger. + public CmtSchemaWriter(ILogger logger) + { + _logger = logger; + } + + /// + public async Task WriteAsync(MigrationSchema schema, string path, CancellationToken cancellationToken = default) + { + if (schema == null) throw new ArgumentNullException(nameof(schema)); + if (string.IsNullOrEmpty(path)) throw new ArgumentNullException(nameof(path)); + + _logger?.LogInformation("Writing schema to {Path}", path); + +#if NET8_0_OR_GREATER + await using var stream = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.None, 4096, FileOptions.Asynchronous); +#else + using var stream = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.None, 4096, FileOptions.Asynchronous); +#endif + await WriteAsync(schema, stream, cancellationToken).ConfigureAwait(false); + } + + /// + public async Task WriteAsync(MigrationSchema schema, Stream stream, CancellationToken cancellationToken = default) + { + if (schema == null) throw new ArgumentNullException(nameof(schema)); + if (stream == null) throw new ArgumentNullException(nameof(stream)); + + var settings = new XmlWriterSettings + { + Async = true, + Indent = true, + Encoding = new UTF8Encoding(false) + }; + +#if NET8_0_OR_GREATER + await using var writer = XmlWriter.Create(stream, settings); +#else + using var writer = XmlWriter.Create(stream, settings); +#endif + + await writer.WriteStartDocumentAsync().ConfigureAwait(false); + await writer.WriteStartElementAsync(null, "entities", null).ConfigureAwait(false); + + foreach (var entity in schema.Entities) + { + cancellationToken.ThrowIfCancellationRequested(); + await WriteEntityAsync(writer, entity).ConfigureAwait(false); + } + + await writer.WriteEndElementAsync().ConfigureAwait(false); // entities + await writer.WriteEndDocumentAsync().ConfigureAwait(false); + await writer.FlushAsync().ConfigureAwait(false); + + _logger?.LogInformation("Wrote schema with {Count} entities", schema.Entities.Count); + } + + private static async Task WriteEntityAsync(XmlWriter writer, EntitySchema entity) + { + await writer.WriteStartElementAsync(null, "entity", null).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "name", null, entity.LogicalName).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "displayname", null, entity.DisplayName).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "etc", null, (entity.ObjectTypeCode ?? 0).ToString()).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "primaryidfield", null, entity.PrimaryIdField).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "primarynamefield", null, entity.PrimaryNameField).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "disableplugins", null, entity.DisablePlugins.ToString().ToLowerInvariant()).ConfigureAwait(false); + + // Write fields + await writer.WriteStartElementAsync(null, "fields", null).ConfigureAwait(false); + foreach (var field in entity.Fields) + { + await WriteFieldAsync(writer, field).ConfigureAwait(false); + } + await writer.WriteEndElementAsync().ConfigureAwait(false); // fields + + // Write relationships + if (entity.Relationships.Count > 0) + { + await writer.WriteStartElementAsync(null, "relationships", null).ConfigureAwait(false); + foreach (var rel in entity.Relationships) + { + await WriteRelationshipAsync(writer, rel).ConfigureAwait(false); + } + await writer.WriteEndElementAsync().ConfigureAwait(false); // relationships + } + + // Write filter if present (HTML-encoded) + if (!string.IsNullOrEmpty(entity.FetchXmlFilter)) + { + await writer.WriteElementStringAsync(null, "filter", null, entity.FetchXmlFilter).ConfigureAwait(false); + } + + await writer.WriteEndElementAsync().ConfigureAwait(false); // entity + } + + private static async Task WriteFieldAsync(XmlWriter writer, FieldSchema field) + { + await writer.WriteStartElementAsync(null, "field", null).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "displayname", null, field.DisplayName).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "name", null, field.LogicalName).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "type", null, field.Type).ConfigureAwait(false); + + if (field.IsPrimaryKey) + { + await writer.WriteAttributeStringAsync(null, "primaryKey", null, "true").ConfigureAwait(false); + } + + // Write lookupType for lookup fields (may be pipe-delimited for polymorphic) + if (!string.IsNullOrEmpty(field.LookupEntity)) + { + await writer.WriteAttributeStringAsync(null, "lookupType", null, field.LookupEntity).ConfigureAwait(false); + } + + await writer.WriteEndElementAsync().ConfigureAwait(false); // field + } + + private static async Task WriteRelationshipAsync(XmlWriter writer, RelationshipSchema rel) + { + await writer.WriteStartElementAsync(null, "relationship", null).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "name", null, rel.Name).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "manyToMany", null, rel.IsManyToMany.ToString().ToLowerInvariant()).ConfigureAwait(false); + + if (rel.IsManyToMany) + { + // M2M relationship attributes - CMT format + // relatedEntityName = intersect entity (e.g., "systemuserroles") + // m2mTargetEntity = target entity (e.g., "role") + // m2mTargetEntityPrimaryKey = target entity's PK (e.g., "roleid") + await writer.WriteAttributeStringAsync(null, "isreflexive", null, rel.IsReflexive.ToString().ToLowerInvariant()).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "relatedEntityName", null, rel.IntersectEntity ?? rel.Name).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "m2mTargetEntity", null, rel.Entity2).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "m2mTargetEntityPrimaryKey", null, rel.TargetEntityPrimaryKey ?? $"{rel.Entity2}id").ConfigureAwait(false); + } + else + { + // One-to-many relationship attributes + await writer.WriteAttributeStringAsync(null, "relatedEntityName", null, rel.Entity2).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "referencingEntity", null, rel.Entity1).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "referencingAttribute", null, rel.Entity1Attribute).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "referencedEntity", null, rel.Entity2).ConfigureAwait(false); + await writer.WriteAttributeStringAsync(null, "referencedAttribute", null, rel.Entity2Attribute).ConfigureAwait(false); + } + + await writer.WriteEndElementAsync().ConfigureAwait(false); // relationship + } + } +} diff --git a/src/PPDS.Migration/Formats/ICmtSchemaWriter.cs b/src/PPDS.Migration/Formats/ICmtSchemaWriter.cs new file mode 100644 index 00000000..e74d0af8 --- /dev/null +++ b/src/PPDS.Migration/Formats/ICmtSchemaWriter.cs @@ -0,0 +1,29 @@ +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using PPDS.Migration.Models; + +namespace PPDS.Migration.Formats +{ + /// + /// Interface for writing CMT-compatible schema files. + /// + public interface ICmtSchemaWriter + { + /// + /// Writes a migration schema to a file. + /// + /// The schema to write. + /// The output file path. + /// Cancellation token. + Task WriteAsync(MigrationSchema schema, string path, CancellationToken cancellationToken = default); + + /// + /// Writes a migration schema to a stream. + /// + /// The schema to write. + /// The output stream. + /// Cancellation token. + Task WriteAsync(MigrationSchema schema, Stream stream, CancellationToken cancellationToken = default); + } +} diff --git a/src/PPDS.Migration/Formats/UserMappingReader.cs b/src/PPDS.Migration/Formats/UserMappingReader.cs new file mode 100644 index 00000000..9206232f --- /dev/null +++ b/src/PPDS.Migration/Formats/UserMappingReader.cs @@ -0,0 +1,143 @@ +using System; +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using System.Xml.Linq; +using Microsoft.Extensions.Logging; +using PPDS.Migration.Models; + +namespace PPDS.Migration.Formats +{ + /// + /// Reads user mapping files. + /// + public class UserMappingReader : IUserMappingReader + { + private readonly ILogger? _logger; + + /// + /// Initializes a new instance of the class. + /// + public UserMappingReader() + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The logger. + public UserMappingReader(ILogger logger) + { + _logger = logger; + } + + /// + public async Task ReadAsync(string path, CancellationToken cancellationToken = default) + { + if (string.IsNullOrEmpty(path)) + { + throw new ArgumentNullException(nameof(path)); + } + + if (!File.Exists(path)) + { + throw new FileNotFoundException($"User mapping file not found: {path}", path); + } + + _logger?.LogInformation("Reading user mappings from {Path}", path); + +#if NET8_0_OR_GREATER + await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.Asynchronous); +#else + using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, 4096, FileOptions.Asynchronous); +#endif + return await ReadAsync(stream, cancellationToken).ConfigureAwait(false); + } + + /// + public async Task ReadAsync(Stream stream, CancellationToken cancellationToken = default) + { + if (stream == null) + { + throw new ArgumentNullException(nameof(stream)); + } + +#if NET8_0_OR_GREATER + var doc = await XDocument.LoadAsync(stream, LoadOptions.None, cancellationToken).ConfigureAwait(false); +#else + var doc = XDocument.Load(stream, LoadOptions.None); + await Task.CompletedTask; +#endif + + var collection = ParseMappings(doc); + + _logger?.LogInformation("Loaded {Count} user mappings", collection.Mappings.Count); + + return collection; + } + + private UserMappingCollection ParseMappings(XDocument doc) + { + var root = doc.Root ?? throw new InvalidOperationException("User mapping XML has no root element"); + var collection = new UserMappingCollection(); + + // Parse default settings + var defaultUserAttr = root.Attribute("defaultUserId")?.Value; + if (!string.IsNullOrEmpty(defaultUserAttr) && Guid.TryParse(defaultUserAttr, out var defaultUserId)) + { + collection.DefaultUserId = defaultUserId; + } + + var useCurrentAttr = root.Attribute("useCurrentUserAsDefault")?.Value; + if (!string.IsNullOrEmpty(useCurrentAttr)) + { + collection.UseCurrentUserAsDefault = + useCurrentAttr.Equals("true", StringComparison.OrdinalIgnoreCase) || + useCurrentAttr == "1"; + } + + // Parse mappings + foreach (var mappingElement in root.Elements("mapping")) + { + var sourceIdAttr = mappingElement.Attribute("sourceId")?.Value; + var targetIdAttr = mappingElement.Attribute("targetId")?.Value; + + if (string.IsNullOrEmpty(sourceIdAttr) || !Guid.TryParse(sourceIdAttr, out var sourceId)) + { + continue; + } + + if (string.IsNullOrEmpty(targetIdAttr) || !Guid.TryParse(targetIdAttr, out var targetId)) + { + continue; + } + + collection.Mappings[sourceId] = new UserMapping + { + SourceUserId = sourceId, + SourceUserName = mappingElement.Attribute("sourceName")?.Value, + TargetUserId = targetId, + TargetUserName = mappingElement.Attribute("targetName")?.Value + }; + } + + return collection; + } + } + + /// + /// Interface for reading user mappings. + /// + public interface IUserMappingReader + { + /// + /// Reads user mappings from a file. + /// + Task ReadAsync(string path, CancellationToken cancellationToken = default); + + /// + /// Reads user mappings from a stream. + /// + Task ReadAsync(Stream stream, CancellationToken cancellationToken = default); + } +} diff --git a/src/PPDS.Migration/Import/ImportOptions.cs b/src/PPDS.Migration/Import/ImportOptions.cs index e3eea0ff..a1f96515 100644 --- a/src/PPDS.Migration/Import/ImportOptions.cs +++ b/src/PPDS.Migration/Import/ImportOptions.cs @@ -1,3 +1,5 @@ +using PPDS.Migration.Models; + namespace PPDS.Migration.Import { /// @@ -58,6 +60,18 @@ public class ImportOptions /// Default: 100 /// public int ProgressInterval { get; set; } = 100; + + /// + /// Gets or sets the user mappings for remapping user references. + /// If null, user references are not remapped. + /// + public UserMappingCollection? UserMappings { get; set; } + + /// + /// Gets or sets whether to disable plugins on entities marked with disableplugins=true in schema. + /// Default: true (respects schema setting) + /// + public bool RespectDisablePluginsSetting { get; set; } = true; } /// diff --git a/src/PPDS.Migration/Import/PluginStepManager.cs b/src/PPDS.Migration/Import/PluginStepManager.cs new file mode 100644 index 00000000..0b6a6e35 --- /dev/null +++ b/src/PPDS.Migration/Import/PluginStepManager.cs @@ -0,0 +1,198 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.Xrm.Sdk; +using Microsoft.Xrm.Sdk.Query; +using PPDS.Dataverse.Pooling; + +namespace PPDS.Migration.Import +{ + /// + /// Manages plugin step disabling/enabling during import. + /// + public class PluginStepManager : IPluginStepManager + { + private readonly IDataverseConnectionPool _connectionPool; + private readonly ILogger? _logger; + + /// + /// Initializes a new instance of the class. + /// + public PluginStepManager(IDataverseConnectionPool connectionPool) + { + _connectionPool = connectionPool ?? throw new ArgumentNullException(nameof(connectionPool)); + } + + /// + /// Initializes a new instance of the class. + /// + public PluginStepManager(IDataverseConnectionPool connectionPool, ILogger logger) + : this(connectionPool) + { + _logger = logger; + } + + /// + public async Task> GetActivePluginStepsAsync( + IEnumerable entityLogicalNames, + CancellationToken cancellationToken = default) + { + var entityList = entityLogicalNames.ToList(); + if (entityList.Count == 0) + { + return Array.Empty(); + } + + _logger?.LogInformation("Querying active plugin steps for {Count} entities", entityList.Count); + + await using var client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken) + .ConfigureAwait(false); + + var activeStepIds = new List(); + + // Query sdkmessageprocessingstep for each entity + // We need to join through sdkmessagefilter to find steps by entity + var fetchXml = BuildPluginStepQuery(entityList); + + var response = await client.RetrieveMultipleAsync(new FetchExpression(fetchXml)) + .ConfigureAwait(false); + + foreach (var entity in response.Entities) + { + activeStepIds.Add(entity.Id); + } + + _logger?.LogInformation("Found {Count} active plugin steps", activeStepIds.Count); + + return activeStepIds; + } + + /// + public async Task DisablePluginStepsAsync( + IEnumerable stepIds, + CancellationToken cancellationToken = default) + { + var stepList = stepIds.ToList(); + if (stepList.Count == 0) + { + return; + } + + _logger?.LogInformation("Disabling {Count} plugin steps", stepList.Count); + + await using var client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken) + .ConfigureAwait(false); + + foreach (var stepId in stepList) + { + cancellationToken.ThrowIfCancellationRequested(); + + var update = new Entity("sdkmessageprocessingstep", stepId) + { + ["statecode"] = new OptionSetValue(1), // Disabled + ["statuscode"] = new OptionSetValue(2) // Disabled + }; + + try + { + await client.UpdateAsync(update).ConfigureAwait(false); + } + catch (Exception ex) + { + _logger?.LogWarning(ex, "Failed to disable plugin step {StepId}", stepId); + } + } + } + + /// + public async Task EnablePluginStepsAsync( + IEnumerable stepIds, + CancellationToken cancellationToken = default) + { + var stepList = stepIds.ToList(); + if (stepList.Count == 0) + { + return; + } + + _logger?.LogInformation("Re-enabling {Count} plugin steps", stepList.Count); + + await using var client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken) + .ConfigureAwait(false); + + foreach (var stepId in stepList) + { + cancellationToken.ThrowIfCancellationRequested(); + + var update = new Entity("sdkmessageprocessingstep", stepId) + { + ["statecode"] = new OptionSetValue(0), // Enabled + ["statuscode"] = new OptionSetValue(1) // Enabled + }; + + try + { + await client.UpdateAsync(update).ConfigureAwait(false); + } + catch (Exception ex) + { + _logger?.LogWarning(ex, "Failed to re-enable plugin step {StepId}", stepId); + } + } + } + + private static string BuildPluginStepQuery(List entityLogicalNames) + { + // Build filter condition for multiple entities + var entityConditions = string.Join("\n", + entityLogicalNames.Select(e => $"")); + + return $@" + + + + + + + + + + + {entityConditions} + + + + "; + } + } + + /// + /// Interface for managing plugin steps during import. + /// + public interface IPluginStepManager + { + /// + /// Gets the IDs of active plugin steps for the specified entities. + /// + Task> GetActivePluginStepsAsync( + IEnumerable entityLogicalNames, + CancellationToken cancellationToken = default); + + /// + /// Disables the specified plugin steps. + /// + Task DisablePluginStepsAsync( + IEnumerable stepIds, + CancellationToken cancellationToken = default); + + /// + /// Re-enables the specified plugin steps. + /// + Task EnablePluginStepsAsync( + IEnumerable stepIds, + CancellationToken cancellationToken = default); + } +} diff --git a/src/PPDS.Migration/Import/TieredImporter.cs b/src/PPDS.Migration/Import/TieredImporter.cs index a1f92a48..e769b16f 100644 --- a/src/PPDS.Migration/Import/TieredImporter.cs +++ b/src/PPDS.Migration/Import/TieredImporter.cs @@ -28,6 +28,7 @@ public class TieredImporter : IImporter private readonly ICmtDataReader _dataReader; private readonly IDependencyGraphBuilder _graphBuilder; private readonly IExecutionPlanBuilder _planBuilder; + private readonly IPluginStepManager? _pluginStepManager; private readonly ILogger? _logger; /// @@ -56,9 +57,11 @@ public TieredImporter( ICmtDataReader dataReader, IDependencyGraphBuilder graphBuilder, IExecutionPlanBuilder planBuilder, - ILogger logger) + IPluginStepManager? pluginStepManager = null, + ILogger? logger = null) : this(connectionPool, bulkExecutor, dataReader, graphBuilder, planBuilder) { + _pluginStepManager = pluginStepManager; _logger = logger; } @@ -110,6 +113,36 @@ public async Task ImportAsync( _logger?.LogInformation("Starting tiered import: {Tiers} tiers, {Records} records", plan.TierCount, data.TotalRecordCount); + // Disable plugins on entities with disableplugins=true + IReadOnlyList disabledPluginSteps = Array.Empty(); + if (options.RespectDisablePluginsSetting && _pluginStepManager != null) + { + var entitiesToDisablePlugins = data.Schema.Entities + .Where(e => e.DisablePlugins) + .Select(e => e.LogicalName) + .ToList(); + + if (entitiesToDisablePlugins.Count > 0) + { + progress?.Report(new ProgressEventArgs + { + Phase = MigrationPhase.Analyzing, + Message = $"Disabling plugins for {entitiesToDisablePlugins.Count} entities..." + }); + + disabledPluginSteps = await _pluginStepManager.GetActivePluginStepsAsync( + entitiesToDisablePlugins, cancellationToken).ConfigureAwait(false); + + if (disabledPluginSteps.Count > 0) + { + await _pluginStepManager.DisablePluginStepsAsync( + disabledPluginSteps, cancellationToken).ConfigureAwait(false); + + _logger?.LogInformation("Disabled {Count} plugin steps", disabledPluginSteps.Count); + } + } + } + try { // Process each tier sequentially @@ -177,7 +210,7 @@ await Parallel.ForEachAsync( // Process M2M relationships var relationshipsProcessed = 0; - if (plan.ManyToManyRelationships.Count > 0) + if (data.RelationshipData.Count > 0) { relationshipsProcessed = await ProcessRelationshipsAsync( data, plan, idMappings, options, progress, cancellationToken).ConfigureAwait(false); @@ -200,13 +233,17 @@ await Parallel.ForEachAsync( Errors = errors.ToArray() }; + // Calculate record-level failure count from entity results + var recordFailureCount = entityResults.Sum(r => r.FailureCount); + progress?.Complete(new MigrationResult { Success = result.Success, - RecordsProcessed = result.RecordsImported + result.RecordsUpdated, - SuccessCount = result.RecordsImported, - FailureCount = errors.Count, - Duration = result.Duration + RecordsProcessed = result.RecordsImported + result.RecordsUpdated + recordFailureCount, + SuccessCount = result.RecordsImported + result.RecordsUpdated, + FailureCount = recordFailureCount, + Duration = result.Duration, + Errors = errors.ToArray() }); return result; @@ -236,6 +273,30 @@ await Parallel.ForEachAsync( } }; } + finally + { + // Re-enable plugins that were disabled + if (disabledPluginSteps.Count > 0 && _pluginStepManager != null) + { + progress?.Report(new ProgressEventArgs + { + Phase = MigrationPhase.Complete, + Message = $"Re-enabling {disabledPluginSteps.Count} plugin steps..." + }); + + try + { + await _pluginStepManager.EnablePluginStepsAsync( + disabledPluginSteps, CancellationToken.None).ConfigureAwait(false); + + _logger?.LogInformation("Re-enabled {Count} plugin steps", disabledPluginSteps.Count); + } + catch (Exception ex) + { + _logger?.LogWarning(ex, "Failed to re-enable some plugin steps"); + } + } + } } private async Task ImportEntityAsync( @@ -261,7 +322,7 @@ private async Task ImportEntityAsync( var preparedRecords = new List(); foreach (var record in records) { - var prepared = PrepareRecordForImport(record, deferredSet, idMappings); + var prepared = PrepareRecordForImport(record, deferredSet, idMappings, options); preparedRecords.Add(prepared); } @@ -296,6 +357,8 @@ private async Task ImportEntityAsync( TierNumber = tierNumber, Current = successCount + failureCount, Total = records.Count, + SuccessCount = successCount, + FailureCount = failureCount, RecordsPerSecond = rps }); } @@ -317,11 +380,20 @@ private async Task ImportEntityAsync( private Entity PrepareRecordForImport( Entity record, HashSet deferredFields, - IdMappingCollection idMappings) + IdMappingCollection idMappings, + ImportOptions options) { var prepared = new Entity(record.LogicalName); prepared.Id = record.Id; // Keep original ID for mapping + // UpsertMultiple requires the primary key as an attribute, not just Entity.Id + // Entity.Id is ignored during creation; must add as attribute for deterministic IDs + if (record.Id != Guid.Empty) + { + var primaryKeyName = $"{record.LogicalName}id"; + prepared[primaryKeyName] = record.Id; + } + foreach (var attr in record.Attributes) { // Skip deferred fields @@ -333,11 +405,12 @@ private Entity PrepareRecordForImport( // Remap entity references if (attr.Value is EntityReference er) { - if (idMappings.TryGetNewId(er.LogicalName, er.Id, out var newId)) + var mappedRef = RemapEntityReference(er, idMappings, options); + if (mappedRef != null) { - prepared[attr.Key] = new EntityReference(er.LogicalName, newId); + prepared[attr.Key] = mappedRef; } - // If not mapped yet, keep original (will be processed in deferred phase) + // If null, skip the field (can't be mapped) } else { @@ -348,6 +421,39 @@ private Entity PrepareRecordForImport( return prepared; } + private EntityReference? RemapEntityReference( + EntityReference er, + IdMappingCollection idMappings, + ImportOptions options) + { + // Check if this is a user reference that should use user mapping + if (IsUserReference(er.LogicalName) && options.UserMappings != null) + { + if (options.UserMappings.TryGetMappedUserId(er.Id, out var mappedUserId)) + { + return new EntityReference(er.LogicalName, mappedUserId); + } + // User mapping exists but no mapping found for this user + // Return original if no default, otherwise the default would have been returned + return new EntityReference(er.LogicalName, er.Id); + } + + // Standard ID mapping for non-user references + if (idMappings.TryGetNewId(er.LogicalName, er.Id, out var newId)) + { + return new EntityReference(er.LogicalName, newId); + } + + // Return original - will be processed in deferred phase if needed + return new EntityReference(er.LogicalName, er.Id); + } + + private static bool IsUserReference(string entityLogicalName) + { + return entityLogicalName.Equals("systemuser", StringComparison.OrdinalIgnoreCase) || + entityLogicalName.Equals("team", StringComparison.OrdinalIgnoreCase); + } + private async Task ImportBatchAsync( string entityName, List batch, @@ -365,9 +471,9 @@ private async Task ImportBatchAsync( { var result = options.Mode switch { - ImportMode.Create => await _bulkExecutor.CreateMultipleAsync(entityName, batch, bulkOptions, cancellationToken).ConfigureAwait(false), - ImportMode.Update => await _bulkExecutor.UpdateMultipleAsync(entityName, batch, bulkOptions, cancellationToken).ConfigureAwait(false), - _ => await _bulkExecutor.UpsertMultipleAsync(entityName, batch, bulkOptions, cancellationToken).ConfigureAwait(false) + ImportMode.Create => await _bulkExecutor.CreateMultipleAsync(entityName, batch, bulkOptions, progress: null, cancellationToken).ConfigureAwait(false), + ImportMode.Update => await _bulkExecutor.UpdateMultipleAsync(entityName, batch, bulkOptions, progress: null, cancellationToken).ConfigureAwait(false), + _ => await _bulkExecutor.UpsertMultipleAsync(entityName, batch, bulkOptions, progress: null, cancellationToken).ConfigureAwait(false) }; return new BatchImportResult @@ -384,7 +490,7 @@ private async Task ImportBatchAsync( var successCount = 0; var failureCount = 0; - await using var client = await _connectionPool.GetClientAsync(null, cancellationToken).ConfigureAwait(false); + await using var client = await _connectionPool.GetClientAsync(null, cancellationToken: cancellationToken).ConfigureAwait(false); foreach (var record in batch) { @@ -445,12 +551,9 @@ private async Task ProcessDeferredFieldsAsync( continue; } - progress?.Report(new ProgressEventArgs - { - Phase = MigrationPhase.ProcessingDeferredFields, - Entity = entityName, - Message = $"Updating deferred fields: {string.Join(", ", fields)}" - }); + var fieldList = string.Join(", ", fields); + var processed = 0; + var updated = 0; foreach (var record in records) { @@ -458,6 +561,7 @@ private async Task ProcessDeferredFieldsAsync( if (!idMappings.TryGetNewId(entityName, record.Id, out var newId)) { + processed++; continue; } @@ -478,9 +582,27 @@ private async Task ProcessDeferredFieldsAsync( if (hasUpdates) { - await using var client = await _connectionPool.GetClientAsync(null, cancellationToken).ConfigureAwait(false); + await using var client = await _connectionPool.GetClientAsync(null, cancellationToken: cancellationToken).ConfigureAwait(false); await client.UpdateAsync(update).ConfigureAwait(false); totalUpdated++; + updated++; + } + + processed++; + + // Report progress periodically (every 100 records or at completion) + if (processed % 100 == 0 || processed == records.Count) + { + progress?.Report(new ProgressEventArgs + { + Phase = MigrationPhase.ProcessingDeferredFields, + Entity = entityName, + Field = fieldList, + Current = processed, + Total = records.Count, + SuccessCount = updated, + Message = $"Updating deferred fields: {fieldList}" + }); } } } @@ -499,50 +621,94 @@ private async Task ProcessRelationshipsAsync( { var totalProcessed = 0; - foreach (var relationship in plan.ManyToManyRelationships) + // Build role name-to-ID cache for role lookup + Dictionary? roleNameCache = null; + + foreach (var (entityName, m2mDataList) in data.RelationshipData) { - if (!data.RelationshipData.TryGetValue(relationship.Name, out var associations)) + foreach (var m2mData in m2mDataList) { - continue; - } + cancellationToken.ThrowIfCancellationRequested(); - progress?.Report(new ProgressEventArgs - { - Phase = MigrationPhase.ProcessingRelationships, - Relationship = relationship.Name, - Total = associations.Count - }); + progress?.Report(new ProgressEventArgs + { + Phase = MigrationPhase.ProcessingRelationships, + Entity = entityName, + Relationship = m2mData.RelationshipName, + Message = $"Processing {m2mData.RelationshipName}..." + }); - foreach (var assoc in associations) - { - cancellationToken.ThrowIfCancellationRequested(); + // Get mapped source ID + if (!idMappings.TryGetNewId(entityName, m2mData.SourceId, out var sourceNewId)) + { + _logger?.LogDebug("Skipping M2M for unmapped source {Entity}:{Id}", + entityName, m2mData.SourceId); + continue; + } + + // Map target IDs - special handling for role entity + var mappedTargetIds = new List(); + var isRoleTarget = m2mData.TargetEntityName.Equals("role", StringComparison.OrdinalIgnoreCase); + + foreach (var targetId in m2mData.TargetIds) + { + Guid? mappedId = null; + + // First try direct ID mapping + if (idMappings.TryGetNewId(m2mData.TargetEntityName, targetId, out var directMappedId)) + { + mappedId = directMappedId; + } + // For role entity, try lookup by name + else if (isRoleTarget) + { + roleNameCache ??= await BuildRoleNameCacheAsync(cancellationToken).ConfigureAwait(false); + mappedId = await LookupRoleByIdAsync(targetId, roleNameCache, cancellationToken).ConfigureAwait(false); + } + + if (mappedId.HasValue) + { + mappedTargetIds.Add(mappedId.Value); + } + else + { + _logger?.LogDebug("Could not map target {Entity}:{Id} for relationship {Relationship}", + m2mData.TargetEntityName, targetId, m2mData.RelationshipName); + } + } - if (!idMappings.TryGetNewId(assoc.Entity1LogicalName, assoc.Entity1Id, out var entity1NewId) || - !idMappings.TryGetNewId(assoc.Entity2LogicalName, assoc.Entity2Id, out var entity2NewId)) + if (mappedTargetIds.Count == 0) { continue; } - await using var client = await _connectionPool.GetClientAsync(null, cancellationToken).ConfigureAwait(false); + // Create association request + await using var client = await _connectionPool.GetClientAsync(null, cancellationToken: cancellationToken).ConfigureAwait(false); + + var relatedEntities = new EntityReferenceCollection(); + foreach (var targetId in mappedTargetIds) + { + relatedEntities.Add(new EntityReference(m2mData.TargetEntityName, targetId)); + } var request = new AssociateRequest { - Target = new EntityReference(assoc.Entity1LogicalName, entity1NewId), - RelatedEntities = new EntityReferenceCollection - { - new EntityReference(assoc.Entity2LogicalName, entity2NewId) - }, - Relationship = new Relationship(relationship.Name) + Target = new EntityReference(entityName, sourceNewId), + RelatedEntities = relatedEntities, + Relationship = new Relationship(m2mData.RelationshipName) }; try { await client.ExecuteAsync(request).ConfigureAwait(false); - totalProcessed++; + totalProcessed += mappedTargetIds.Count; } - catch + catch (Exception ex) { // M2M associations may fail if already exists - log but continue + _logger?.LogDebug(ex, "Failed to associate {Source} with {TargetCount} targets via {Relationship}", + sourceNewId, mappedTargetIds.Count, m2mData.RelationshipName); + if (!options.ContinueOnError) { throw; @@ -551,10 +717,89 @@ private async Task ProcessRelationshipsAsync( } } - _logger?.LogInformation("Processed {Count} M2M relationships", totalProcessed); + _logger?.LogInformation("Processed {Count} M2M associations", totalProcessed); return totalProcessed; } + private async Task> BuildRoleNameCacheAsync(CancellationToken cancellationToken) + { + var cache = new Dictionary(StringComparer.OrdinalIgnoreCase); + + try + { + await using var client = await _connectionPool.GetClientAsync(null, cancellationToken: cancellationToken).ConfigureAwait(false); + + var fetchXml = @" + + + + + "; + + var response = await client.RetrieveMultipleAsync( + new Microsoft.Xrm.Sdk.Query.FetchExpression(fetchXml)).ConfigureAwait(false); + + foreach (var entity in response.Entities) + { + var name = entity.GetAttributeValue("name"); + var id = entity.Id; + if (!string.IsNullOrEmpty(name) && !cache.ContainsKey(name)) + { + cache[name] = id; + } + } + + _logger?.LogDebug("Built role name cache with {Count} entries", cache.Count); + } + catch (Exception ex) + { + _logger?.LogWarning(ex, "Failed to build role name cache"); + } + + return cache; + } + + private async Task LookupRoleByIdAsync( + Guid sourceRoleId, + Dictionary roleNameCache, + CancellationToken cancellationToken) + { + // First, we need to get the role name from source environment + // Since we only have the source ID, we need to query for it + try + { + await using var client = await _connectionPool.GetClientAsync(null, cancellationToken: cancellationToken).ConfigureAwait(false); + + // Try to retrieve the role by ID - if it exists in target, we can use it directly + var fetchXml = $@" + + + + + + + "; + + var response = await client.RetrieveMultipleAsync( + new Microsoft.Xrm.Sdk.Query.FetchExpression(fetchXml)).ConfigureAwait(false); + + if (response.Entities.Count > 0) + { + // Role exists with same ID in target + return sourceRoleId; + } + } + catch + { + // Role doesn't exist with source ID, which is expected + } + + // Role doesn't exist with source ID - this is the common case + // We need to find it by name, but we don't have the source name here + // For now, return null - proper solution requires exporting role names + return null; + } + private class BatchImportResult { public int SuccessCount { get; set; } diff --git a/src/PPDS.Migration/Models/FieldSchema.cs b/src/PPDS.Migration/Models/FieldSchema.cs index 3ea99fa7..c5db88c4 100644 --- a/src/PPDS.Migration/Models/FieldSchema.cs +++ b/src/PPDS.Migration/Models/FieldSchema.cs @@ -37,6 +37,11 @@ public class FieldSchema /// public bool IsRequired { get; set; } + /// + /// Gets or sets whether this field is the primary key. + /// + public bool IsPrimaryKey { get; set; } + /// /// Gets or sets the maximum length for string fields. /// @@ -48,9 +53,10 @@ public class FieldSchema public int? Precision { get; set; } /// - /// Gets whether this field is a lookup type (lookup, customer, owner). + /// Gets whether this field is a lookup type. /// - public bool IsLookup => Type.Equals("lookup", StringComparison.OrdinalIgnoreCase) || + public bool IsLookup => Type.Equals("entityreference", StringComparison.OrdinalIgnoreCase) || + Type.Equals("lookup", StringComparison.OrdinalIgnoreCase) || Type.Equals("customer", StringComparison.OrdinalIgnoreCase) || Type.Equals("owner", StringComparison.OrdinalIgnoreCase) || Type.Equals("partylist", StringComparison.OrdinalIgnoreCase); diff --git a/src/PPDS.Migration/Models/MigrationData.cs b/src/PPDS.Migration/Models/MigrationData.cs index 034b1daa..47d929c3 100644 --- a/src/PPDS.Migration/Models/MigrationData.cs +++ b/src/PPDS.Migration/Models/MigrationData.cs @@ -23,10 +23,10 @@ public class MigrationData /// /// Gets or sets the many-to-many relationship data. - /// Key is relationship name, value is list of associations. + /// Key is entity logical name (source entity), value is list of grouped associations. /// - public IReadOnlyDictionary> RelationshipData { get; set; } - = new Dictionary>(); + public IReadOnlyDictionary> RelationshipData { get; set; } + = new Dictionary>(); /// /// Gets or sets the export timestamp. @@ -56,33 +56,39 @@ public int TotalRecordCount } /// - /// Represents a many-to-many association between two records. + /// Represents grouped M2M associations for one source record. + /// Matches CMT data.xml format where each source has a list of targets. /// - public class ManyToManyAssociation + public class ManyToManyRelationshipData { /// - /// Gets or sets the relationship name. + /// Gets or sets the relationship schema name. /// public string RelationshipName { get; set; } = string.Empty; /// - /// Gets or sets the first entity logical name. + /// Gets or sets the source entity logical name. /// - public string Entity1LogicalName { get; set; } = string.Empty; + public string SourceEntityName { get; set; } = string.Empty; /// - /// Gets or sets the first record ID. + /// Gets or sets the source record ID. /// - public Guid Entity1Id { get; set; } + public Guid SourceId { get; set; } /// - /// Gets or sets the second entity logical name. + /// Gets or sets the target entity logical name. /// - public string Entity2LogicalName { get; set; } = string.Empty; + public string TargetEntityName { get; set; } = string.Empty; /// - /// Gets or sets the second record ID. + /// Gets or sets the target entity's primary key field name. /// - public Guid Entity2Id { get; set; } + public string TargetEntityPrimaryKey { get; set; } = string.Empty; + + /// + /// Gets or sets the target record IDs. + /// + public List TargetIds { get; set; } = new(); } } diff --git a/src/PPDS.Migration/Models/RelationshipSchema.cs b/src/PPDS.Migration/Models/RelationshipSchema.cs index 104ba518..7f78b7a2 100644 --- a/src/PPDS.Migration/Models/RelationshipSchema.cs +++ b/src/PPDS.Migration/Models/RelationshipSchema.cs @@ -40,6 +40,17 @@ public class RelationshipSchema /// public string? IntersectEntity { get; set; } + /// + /// Gets or sets whether this is a reflexive (self-referential) relationship. + /// + public bool IsReflexive { get; set; } + + /// + /// Gets or sets the target entity's primary key field name (e.g., "roleid"). + /// Required for CMT format compatibility. + /// + public string? TargetEntityPrimaryKey { get; set; } + /// public override string ToString() => IsManyToMany ? $"{Name} (M2M: {Entity1} <-> {Entity2})" diff --git a/src/PPDS.Migration/Models/UserMapping.cs b/src/PPDS.Migration/Models/UserMapping.cs new file mode 100644 index 00000000..d7986406 --- /dev/null +++ b/src/PPDS.Migration/Models/UserMapping.cs @@ -0,0 +1,79 @@ +using System; +using System.Collections.Generic; + +namespace PPDS.Migration.Models +{ + /// + /// Collection of user mappings for migrating user references between environments. + /// + public class UserMappingCollection + { + /// + /// Gets or sets the user mappings. + /// Key is source user ID, value is the mapping. + /// + public Dictionary Mappings { get; set; } = new(); + + /// + /// Gets or sets the default user ID to use when no mapping is found. + /// If null, unmapped users are left as-is. + /// + public Guid? DefaultUserId { get; set; } + + /// + /// Gets or sets whether to use the current user as the default when no mapping is found. + /// Takes precedence over DefaultUserId. + /// + public bool UseCurrentUserAsDefault { get; set; } = true; + + /// + /// Tries to get the mapped user ID for a source user. + /// + /// The source user ID. + /// The mapped target user ID. + /// True if a mapping was found or a default applies. + public bool TryGetMappedUserId(Guid sourceUserId, out Guid targetUserId) + { + if (Mappings.TryGetValue(sourceUserId, out var mapping)) + { + targetUserId = mapping.TargetUserId; + return true; + } + + if (DefaultUserId.HasValue) + { + targetUserId = DefaultUserId.Value; + return true; + } + + targetUserId = Guid.Empty; + return false; + } + } + + /// + /// Represents a mapping from a source user to a target user. + /// + public class UserMapping + { + /// + /// Gets or sets the source user ID. + /// + public Guid SourceUserId { get; set; } + + /// + /// Gets or sets the source user name (for reference/display). + /// + public string? SourceUserName { get; set; } + + /// + /// Gets or sets the target user ID. + /// + public Guid TargetUserId { get; set; } + + /// + /// Gets or sets the target user name (for reference/display). + /// + public string? TargetUserName { get; set; } + } +} diff --git a/src/PPDS.Migration/PPDS.Migration.csproj b/src/PPDS.Migration/PPDS.Migration.csproj index aab7368f..df68c42f 100644 --- a/src/PPDS.Migration/PPDS.Migration.csproj +++ b/src/PPDS.Migration/PPDS.Migration.csproj @@ -46,9 +46,10 @@ dependency-aware tiered import, and CMT format compatibility for automated pipel - - - + + + + diff --git a/src/PPDS.Migration/Progress/ConsoleProgressReporter.cs b/src/PPDS.Migration/Progress/ConsoleProgressReporter.cs index b77422b0..59c2e21a 100644 --- a/src/PPDS.Migration/Progress/ConsoleProgressReporter.cs +++ b/src/PPDS.Migration/Progress/ConsoleProgressReporter.cs @@ -1,5 +1,6 @@ using System; using System.Diagnostics; +using System.Linq; namespace PPDS.Migration.Progress { @@ -8,6 +9,8 @@ namespace PPDS.Migration.Progress /// public class ConsoleProgressReporter : IProgressReporter { + private const int MaxErrorsToDisplay = 10; + private readonly Stopwatch _stopwatch = new(); private string? _lastEntity; private int _lastProgress; @@ -41,7 +44,12 @@ public void Report(ProgressEventArgs args) var rps = args.RecordsPerSecond.HasValue ? $" @ {args.RecordsPerSecond:F1} rec/s" : ""; var pct = args.Total > 0 ? $" ({args.PercentComplete:F0}%)" : ""; - Console.WriteLine($"{prefix} [{phase}] {args.Entity}{tierInfo}: {args.Current:N0}/{args.Total:N0}{pct}{rps}"); + // Show success/failure breakdown if there are failures + var failureInfo = args.FailureCount > 0 + ? $" [{args.SuccessCount} ok, {args.FailureCount} failed]" + : ""; + + Console.WriteLine($"{prefix} [{phase}] {args.Entity}{tierInfo}: {args.Current:N0}/{args.Total:N0}{pct}{rps}{failureInfo}"); _lastEntity = args.Entity; _lastProgress = args.Current; @@ -49,7 +57,16 @@ public void Report(ProgressEventArgs args) break; case MigrationPhase.ProcessingDeferredFields: - Console.WriteLine($"{prefix} [Deferred] {args.Entity}.{args.Field}: {args.Current:N0}/{args.Total:N0}"); + // Handle cases where Field might be null or empty + if (!string.IsNullOrEmpty(args.Field) && args.Total > 0) + { + var successInfo = args.SuccessCount > 0 ? $" ({args.SuccessCount} updated)" : ""; + Console.WriteLine($"{prefix} [Deferred] {args.Entity}.{args.Field}: {args.Current:N0}/{args.Total:N0}{successInfo}"); + } + else if (!string.IsNullOrEmpty(args.Message)) + { + Console.WriteLine($"{prefix} [Deferred] {args.Entity}: {args.Message}"); + } break; case MigrationPhase.ProcessingRelationships: @@ -71,16 +88,53 @@ public void Complete(MigrationResult result) _stopwatch.Stop(); Console.WriteLine(); Console.WriteLine(new string('=', 60)); - Console.WriteLine(result.Success ? "Migration Completed Successfully" : "Migration Completed with Errors"); + + if (result.Success) + { + Console.ForegroundColor = ConsoleColor.Green; + Console.WriteLine("Migration Completed Successfully"); + } + else + { + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("Migration Completed with Errors"); + } + Console.ResetColor(); + Console.WriteLine(new string('=', 60)); Console.WriteLine($"Duration: {result.Duration:hh\\:mm\\:ss}"); - Console.WriteLine($"Records: {result.RecordsProcessed:N0}"); - Console.WriteLine($"Throughput: {result.RecordsPerSecond:F1} records/second"); + Console.WriteLine($"Succeeded: {result.SuccessCount:N0}"); if (result.FailureCount > 0) { - Console.WriteLine($"Failures: {result.FailureCount:N0}"); + Console.ForegroundColor = ConsoleColor.Red; + Console.WriteLine($"Failed: {result.FailureCount:N0}"); + Console.ResetColor(); } + + Console.WriteLine($"Throughput: {result.RecordsPerSecond:F1} records/second"); + + // Display error details if available + if (result.Errors?.Count > 0) + { + Console.WriteLine(); + Console.ForegroundColor = ConsoleColor.Red; + Console.WriteLine($"Errors ({result.Errors.Count}):"); + + foreach (var error in result.Errors.Take(MaxErrorsToDisplay)) + { + var entity = !string.IsNullOrEmpty(error.EntityLogicalName) ? $"{error.EntityLogicalName}: " : ""; + var index = error.RecordIndex.HasValue ? $"[{error.RecordIndex}] " : ""; + Console.WriteLine($" - {entity}{index}{error.Message}"); + } + + if (result.Errors.Count > MaxErrorsToDisplay) + { + Console.WriteLine($" ... and {result.Errors.Count - MaxErrorsToDisplay} more errors"); + } + Console.ResetColor(); + } + Console.WriteLine(); } @@ -99,7 +153,7 @@ public void Error(Exception exception, string? context = null) private bool ShouldUpdate(int current) { - // Update every 1000 records or 10% progress + // Update every 1000 records or 100 records, whichever comes first return current - _lastProgress >= 1000 || current - _lastProgress >= 100; } } diff --git a/src/PPDS.Migration/Progress/ProgressEventArgs.cs b/src/PPDS.Migration/Progress/ProgressEventArgs.cs index b11cb11c..5aac1ce6 100644 --- a/src/PPDS.Migration/Progress/ProgressEventArgs.cs +++ b/src/PPDS.Migration/Progress/ProgressEventArgs.cs @@ -47,6 +47,16 @@ public class ProgressEventArgs : EventArgs /// public double? RecordsPerSecond { get; set; } + /// + /// Gets or sets the number of records that succeeded in the current batch/phase. + /// + public int SuccessCount { get; set; } + + /// + /// Gets or sets the number of records that failed in the current batch/phase. + /// + public int FailureCount { get; set; } + /// /// Gets or sets a descriptive message. /// diff --git a/src/PPDS.Migration/Schema/DataverseSchemaGenerator.cs b/src/PPDS.Migration/Schema/DataverseSchemaGenerator.cs new file mode 100644 index 00000000..8efc09e9 --- /dev/null +++ b/src/PPDS.Migration/Schema/DataverseSchemaGenerator.cs @@ -0,0 +1,393 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.ServiceModel; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using Microsoft.PowerPlatform.Dataverse.Client; +using Microsoft.Xrm.Sdk; +using Microsoft.Xrm.Sdk.Messages; +using Microsoft.Xrm.Sdk.Metadata; +using PPDS.Dataverse.Pooling; +using PPDS.Migration.Models; +using PPDS.Migration.Progress; + +namespace PPDS.Migration.Schema +{ + /// + /// Generates migration schemas from Dataverse metadata. + /// + public class DataverseSchemaGenerator : ISchemaGenerator + { + private readonly IDataverseConnectionPool _connectionPool; + private readonly ILogger? _logger; + + /// + /// Initializes a new instance of the class. + /// + /// The connection pool. + /// Optional logger. + public DataverseSchemaGenerator( + IDataverseConnectionPool connectionPool, + ILogger? logger = null) + { + _connectionPool = connectionPool ?? throw new ArgumentNullException(nameof(connectionPool)); + _logger = logger; + } + + /// + public async Task> GetAvailableEntitiesAsync( + CancellationToken cancellationToken = default) + { + _logger?.LogInformation("Retrieving available entities from Dataverse"); + + await using var client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken) + .ConfigureAwait(false); + + var request = new RetrieveAllEntitiesRequest + { + EntityFilters = EntityFilters.Entity, + RetrieveAsIfPublished = false + }; + + var response = (RetrieveAllEntitiesResponse)await client.ExecuteAsync(request, cancellationToken) + .ConfigureAwait(false); + + var entities = response.EntityMetadata + .Where(e => e.IsIntersect != true) // Exclude intersect entities + .Select(e => new EntityInfo + { + LogicalName = e.LogicalName, + DisplayName = e.DisplayName?.UserLocalizedLabel?.Label ?? e.LogicalName, + ObjectTypeCode = e.ObjectTypeCode ?? 0, + IsCustomEntity = e.IsCustomEntity ?? false + }) + .OrderBy(e => e.LogicalName) + .ToList(); + + _logger?.LogInformation("Found {Count} entities", entities.Count); + + return entities; + } + + /// + public async Task GenerateAsync( + IEnumerable entityLogicalNames, + SchemaGeneratorOptions? options = null, + IProgressReporter? progress = null, + CancellationToken cancellationToken = default) + { + options ??= new SchemaGeneratorOptions(); + var entityNames = entityLogicalNames.ToList(); + + _logger?.LogInformation("Generating schema for {Count} entities", entityNames.Count); + + progress?.Report(new ProgressEventArgs + { + Phase = MigrationPhase.Analyzing, + Message = $"Generating schema for {entityNames.Count} entities..." + }); + + await using var client = await _connectionPool.GetClientAsync(cancellationToken: cancellationToken) + .ConfigureAwait(false); + + var entitySchemas = new List(); + var entitySet = new HashSet(entityNames, StringComparer.OrdinalIgnoreCase); + + foreach (var entityName in entityNames) + { + cancellationToken.ThrowIfCancellationRequested(); + + progress?.Report(new ProgressEventArgs + { + Phase = MigrationPhase.Analyzing, + Entity = entityName, + Message = $"Retrieving metadata for {entityName}..." + }); + + var entitySchema = await GenerateEntitySchemaAsync( + client, entityName, entitySet, options, cancellationToken).ConfigureAwait(false); + + if (entitySchema != null) + { + entitySchemas.Add(entitySchema); + } + } + + _logger?.LogInformation("Generated schema with {Count} entities", entitySchemas.Count); + + return new MigrationSchema + { + Version = "1.0", + GeneratedAt = DateTime.UtcNow, + Entities = entitySchemas + }; + } + + private async Task GenerateEntitySchemaAsync( + IOrganizationServiceAsync2 client, + string entityName, + HashSet includedEntities, + SchemaGeneratorOptions options, + CancellationToken cancellationToken) + { + _logger?.LogDebug("Retrieving metadata for entity {Entity}", entityName); + + var request = new RetrieveEntityRequest + { + LogicalName = entityName, + EntityFilters = EntityFilters.Attributes | EntityFilters.Relationships, + RetrieveAsIfPublished = false + }; + + RetrieveEntityResponse response; + try + { + response = (RetrieveEntityResponse)await client.ExecuteAsync(request, cancellationToken) + .ConfigureAwait(false); + } + catch (FaultException ex) + { + _logger?.LogWarning(ex, "Dataverse fault retrieving metadata for entity {Entity}: {Message}", + entityName, ex.Message); + return null; + } + catch (TimeoutException ex) + { + _logger?.LogWarning(ex, "Timeout retrieving metadata for entity {Entity}", entityName); + return null; + } + catch (Exception ex) when (ex is System.Net.Http.HttpRequestException or OperationCanceledException) + { + _logger?.LogWarning(ex, "Network error retrieving metadata for entity {Entity}: {Message}", + entityName, ex.Message); + return null; + } + + var metadata = response.EntityMetadata; + + // Generate fields + var fields = GenerateFields(metadata, options); + + // Generate relationships + var relationships = options.IncludeRelationships + ? GenerateRelationships(metadata, includedEntities) + : Array.Empty(); + + return new EntitySchema + { + LogicalName = metadata.LogicalName, + DisplayName = metadata.DisplayName?.UserLocalizedLabel?.Label ?? metadata.LogicalName, + ObjectTypeCode = metadata.ObjectTypeCode, + PrimaryIdField = metadata.PrimaryIdAttribute ?? $"{metadata.LogicalName}id", + PrimaryNameField = metadata.PrimaryNameAttribute ?? "name", + DisablePlugins = options.DisablePluginsByDefault, + Fields = fields.ToList(), + Relationships = relationships.ToList() + }; + } + + private IEnumerable GenerateFields(EntityMetadata metadata, SchemaGeneratorOptions options) + { + if (metadata.Attributes == null) + { + yield break; + } + + foreach (var attr in metadata.Attributes) + { + // Skip if not valid for read + if (attr.IsValidForRead != true) + { + continue; + } + + var isPrimaryKey = attr.LogicalName == metadata.PrimaryIdAttribute; + + // Apply attribute filtering (primary key is always included) + if (!options.ShouldIncludeAttribute(attr.LogicalName, isPrimaryKey)) + { + continue; + } + + // Skip system fields unless requested + if (!options.IncludeSystemFields && IsSystemField(attr.LogicalName)) + { + continue; + } + + // Skip non-custom fields if only custom requested + if (options.CustomFieldsOnly && attr.IsCustomAttribute != true) + { + continue; + } + + var fieldType = GetFieldType(attr); + var lookupTargets = GetLookupTargets(attr); + + yield return new FieldSchema + { + LogicalName = attr.LogicalName, + DisplayName = attr.DisplayName?.UserLocalizedLabel?.Label ?? attr.LogicalName, + Type = fieldType, + LookupEntity = lookupTargets, + IsCustomField = attr.IsCustomAttribute ?? false, + IsRequired = attr.RequiredLevel?.Value == AttributeRequiredLevel.ApplicationRequired || + attr.RequiredLevel?.Value == AttributeRequiredLevel.SystemRequired, + IsPrimaryKey = isPrimaryKey + }; + } + } + + private static string GetFieldType(AttributeMetadata attr) + { + return attr.AttributeType switch + { + AttributeTypeCode.BigInt => "bigint", + AttributeTypeCode.Boolean => "boolean", + AttributeTypeCode.CalendarRules => "calendarrules", + AttributeTypeCode.Customer => "entityreference", + AttributeTypeCode.DateTime => "datetime", + AttributeTypeCode.Decimal => "decimal", + AttributeTypeCode.Double => "float", + AttributeTypeCode.Integer => "integer", + AttributeTypeCode.Lookup => "entityreference", + AttributeTypeCode.Memo => "memo", + AttributeTypeCode.Money => "money", + AttributeTypeCode.Owner => "owner", + AttributeTypeCode.PartyList => "partylist", + AttributeTypeCode.Picklist => "picklist", + AttributeTypeCode.State => "state", + AttributeTypeCode.Status => "status", + AttributeTypeCode.String => "string", + AttributeTypeCode.Uniqueidentifier => "guid", + AttributeTypeCode.Virtual => "virtual", + AttributeTypeCode.ManagedProperty => "managedproperty", + AttributeTypeCode.EntityName => "entityname", + _ => "string" + }; + } + + private static string? GetLookupTargets(AttributeMetadata attr) + { + if (attr is not LookupAttributeMetadata lookupAttr) + { + return null; + } + + var targets = lookupAttr.Targets; + if (targets == null || targets.Length == 0) + { + return "*"; // Unknown/unbounded lookup + } + + // CMT format: pipe-delimited for polymorphic lookups + return string.Join("|", targets); + } + + private IEnumerable GenerateRelationships( + EntityMetadata metadata, + HashSet includedEntities) + { + // One-to-Many relationships (where this entity is referenced) + if (metadata.OneToManyRelationships != null) + { + foreach (var rel in metadata.OneToManyRelationships) + { + // Only include if related entity is in our set + if (!includedEntities.Contains(rel.ReferencingEntity)) + { + continue; + } + + yield return new RelationshipSchema + { + Name = rel.SchemaName, + IsManyToMany = false, + Entity1 = rel.ReferencingEntity, + Entity1Attribute = rel.ReferencingAttribute, + Entity2 = rel.ReferencedEntity, + Entity2Attribute = rel.ReferencedAttribute + }; + } + } + + // Many-to-Many relationships + if (metadata.ManyToManyRelationships != null) + { + foreach (var rel in metadata.ManyToManyRelationships) + { + // Determine the "other" entity and correct attributes + // The relationship must be relative to the current entity (source) + string sourceEntity, targetEntity, sourceAttribute, targetAttribute; + if (rel.Entity1LogicalName.Equals(metadata.LogicalName, StringComparison.OrdinalIgnoreCase)) + { + sourceEntity = rel.Entity1LogicalName; + sourceAttribute = rel.Entity1IntersectAttribute; + targetEntity = rel.Entity2LogicalName; + targetAttribute = rel.Entity2IntersectAttribute; + } + else + { + sourceEntity = rel.Entity2LogicalName; + sourceAttribute = rel.Entity2IntersectAttribute; + targetEntity = rel.Entity1LogicalName; + targetAttribute = rel.Entity1IntersectAttribute; + } + + // Only include if target entity is in our set + if (!includedEntities.Contains(targetEntity)) + { + continue; + } + + // Only emit from one side to avoid duplicates + if (string.Compare(metadata.LogicalName, targetEntity, StringComparison.OrdinalIgnoreCase) > 0) + { + continue; + } + + // Check for reflexive relationship (self-referencing M2M) + var isReflexive = sourceEntity.Equals(targetEntity, StringComparison.OrdinalIgnoreCase); + + yield return new RelationshipSchema + { + Name = rel.SchemaName, + IsManyToMany = true, + IsReflexive = isReflexive, + Entity1 = sourceEntity, + Entity1Attribute = sourceAttribute, + Entity2 = targetEntity, + Entity2Attribute = targetAttribute, + IntersectEntity = rel.IntersectEntityName, + TargetEntityPrimaryKey = targetAttribute + }; + } + } + } + + private static bool IsSystemField(string fieldName) + { + // Common system fields that are usually not migrated + return fieldName switch + { + "createdon" => true, + "createdby" => true, + "createdonbehalfby" => true, + "modifiedon" => true, + "modifiedby" => true, + "modifiedonbehalfby" => true, + "versionnumber" => true, + "timezoneruleversionnumber" => true, + "utcconversiontimezonecode" => true, + "overriddencreatedon" => true, + "importsequencenumber" => true, + "owningbusinessunit" => true, + "owningteam" => true, + "owninguser" => true, + _ => false + }; + } + } +} diff --git a/src/PPDS.Migration/Schema/ISchemaGenerator.cs b/src/PPDS.Migration/Schema/ISchemaGenerator.cs new file mode 100644 index 00000000..1e0b870e --- /dev/null +++ b/src/PPDS.Migration/Schema/ISchemaGenerator.cs @@ -0,0 +1,62 @@ +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; +using PPDS.Migration.Models; +using PPDS.Migration.Progress; + +namespace PPDS.Migration.Schema +{ + /// + /// Interface for generating migration schemas from Dataverse metadata. + /// + public interface ISchemaGenerator + { + /// + /// Generates a migration schema for the specified entities. + /// + /// The logical names of entities to include. + /// Schema generation options. + /// Optional progress reporter. + /// Cancellation token. + /// The generated migration schema. + Task GenerateAsync( + IEnumerable entityLogicalNames, + SchemaGeneratorOptions? options = null, + IProgressReporter? progress = null, + CancellationToken cancellationToken = default); + + /// + /// Gets available entities from Dataverse. + /// + /// Cancellation token. + /// List of entity logical names and display names. + Task> GetAvailableEntitiesAsync( + CancellationToken cancellationToken = default); + } + + /// + /// Basic entity information. + /// + public class EntityInfo + { + /// + /// Gets or sets the entity logical name. + /// + public string LogicalName { get; set; } = string.Empty; + + /// + /// Gets or sets the entity display name. + /// + public string DisplayName { get; set; } = string.Empty; + + /// + /// Gets or sets the entity type code. + /// + public int ObjectTypeCode { get; set; } + + /// + /// Gets or sets whether this is a custom entity. + /// + public bool IsCustomEntity { get; set; } + } +} diff --git a/src/PPDS.Migration/Schema/SchemaGeneratorOptions.cs b/src/PPDS.Migration/Schema/SchemaGeneratorOptions.cs new file mode 100644 index 00000000..b305281b --- /dev/null +++ b/src/PPDS.Migration/Schema/SchemaGeneratorOptions.cs @@ -0,0 +1,98 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; + +namespace PPDS.Migration.Schema +{ + /// + /// Options for schema generation. + /// + public class SchemaGeneratorOptions + { + /// + /// Gets or sets whether to include all fields. Default: true. + /// + public bool IncludeAllFields { get; set; } = true; + + /// + /// Gets or sets whether to include system fields (createdon, modifiedon, etc.). Default: false. + /// + public bool IncludeSystemFields { get; set; } = false; + + /// + /// Gets or sets whether to include relationships. Default: true. + /// + public bool IncludeRelationships { get; set; } = true; + + /// + /// Gets or sets whether to include only custom fields. Default: false. + /// + public bool CustomFieldsOnly { get; set; } = false; + + /// + /// Gets or sets the default value for disabling plugins during import. Default: false. + /// + public bool DisablePluginsByDefault { get; set; } = false; + + /// + /// Gets or sets the attributes to include (whitelist). If set, only these attributes are included. + /// Takes precedence over ExcludeAttributes. Primary key is always included. + /// + public IReadOnlyList? IncludeAttributes { get; set; } + + /// + /// Gets or sets the attributes to exclude (blacklist). If set, these attributes are excluded. + /// Ignored if IncludeAttributes is set. + /// + public IReadOnlyList? ExcludeAttributes { get; set; } + + /// + /// Gets or sets attribute name patterns to exclude (e.g., "new_*", "*_base"). + /// Uses glob-style wildcards (* matches any characters). + /// + public IReadOnlyList? ExcludeAttributePatterns { get; set; } + + /// + /// Determines if an attribute should be included based on the filtering options. + /// + /// The attribute logical name. + /// Whether this attribute is the primary key. + /// True if the attribute should be included. + public bool ShouldIncludeAttribute(string attributeName, bool isPrimaryKey) + { + // Primary key is always included + if (isPrimaryKey) + { + return true; + } + + // Whitelist mode: only include specified attributes + if (IncludeAttributes != null && IncludeAttributes.Count > 0) + { + return IncludeAttributes.Any(attr => attr.Equals(attributeName, StringComparison.OrdinalIgnoreCase)); + } + + // Blacklist mode: exclude specified attributes + if (ExcludeAttributes?.Any(attr => attr.Equals(attributeName, StringComparison.OrdinalIgnoreCase)) == true) + { + return false; + } + + // Pattern exclusion + if (ExcludeAttributePatterns?.Any(pattern => MatchesPattern(attributeName, pattern)) == true) + { + return false; + } + + return true; + } + + private static bool MatchesPattern(string value, string pattern) + { + // Convert glob pattern to regex + var regexPattern = "^" + Regex.Escape(pattern).Replace("\\*", ".*") + "$"; + return Regex.IsMatch(value, regexPattern, RegexOptions.IgnoreCase); + } + } +} diff --git a/tests/PPDS.Dataverse.Tests/PPDS.Dataverse.Tests.csproj b/tests/PPDS.Dataverse.Tests/PPDS.Dataverse.Tests.csproj index fb6d5166..c3ea154d 100644 --- a/tests/PPDS.Dataverse.Tests/PPDS.Dataverse.Tests.csproj +++ b/tests/PPDS.Dataverse.Tests/PPDS.Dataverse.Tests.csproj @@ -16,13 +16,13 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive - - + + diff --git a/tests/PPDS.Migration.Cli.Tests/PPDS.Migration.Cli.Tests.csproj b/tests/PPDS.Migration.Cli.Tests/PPDS.Migration.Cli.Tests.csproj index b3fc5cf0..84fe0e28 100644 --- a/tests/PPDS.Migration.Cli.Tests/PPDS.Migration.Cli.Tests.csproj +++ b/tests/PPDS.Migration.Cli.Tests/PPDS.Migration.Cli.Tests.csproj @@ -16,7 +16,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/tests/PPDS.Plugins.Tests/PPDS.Plugins.Tests.csproj b/tests/PPDS.Plugins.Tests/PPDS.Plugins.Tests.csproj index 4a33b460..c27ee3dc 100644 --- a/tests/PPDS.Plugins.Tests/PPDS.Plugins.Tests.csproj +++ b/tests/PPDS.Plugins.Tests/PPDS.Plugins.Tests.csproj @@ -16,7 +16,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + all runtime; build; native; contentfiles; analyzers; buildtransitive