diff --git a/docs/guides/field-level-delete-before-replace.md b/docs/guides/field-level-delete-before-replace.md new file mode 100644 index 000000000..199cdb524 --- /dev/null +++ b/docs/guides/field-level-delete-before-replace.md @@ -0,0 +1,628 @@ +# Field-Level Delete Before Replace Configuration + +This guide explains how to configure field-level delete-before-replace predicates in your bridged Terraform provider. + +## Overview + +The Pulumi Terraform Bridge supports field-level conditional logic that can recommend or automatically apply `deleteBeforeReplace` based on resource state. This uses a predicate-based system to make intelligent decisions about when delete-before-replace is needed. + +This feature is particularly useful for: +- Network configuration changes that conflict with attached interfaces +- Instance type changes that conflict with attached storage +- Conditional logic based on resource state (attachments, instance types, etc.) +- Any field change where conflicts depend on other resource properties + +## Predicate-Based Configuration + +### Basic Predicate Setup + +Configure conditional delete-before-replace using predicates in your provider's `ResourceInfo`: + +```go +package main + +import ( + "github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfbridge" + "github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfbridge/info" +) + +func Provider() *tfbridge.ProviderInfo { + return &tfbridge.ProviderInfo{ + // ... other provider configuration ... + + Resources: map[string]*tfbridge.ResourceInfo{ + "aws_instance": { + Tok: "aws:ec2/instance:Instance", + Fields: map[string]*tfbridge.SchemaInfo{ + "instance_type": { + // This field already forces replacement in Terraform + ForceNew: &[]bool{true}[0], + + // NEW: Predicate-based delete-before-replace logic + DeleteBeforeReplaceFunc: &tfbridge.DeleteBeforeReplaceEvaluator{ + Predicates: []tfbridge.DeleteBeforeReplacePredicate{ + { + Type: "fieldChanged", + Config: map[string]interface{}{ + "field": "instance_type", + }, + Description: "Instance type is changing", + }, + { + Type: "hasAttachedResources", + Config: map[string]interface{}{ + "resourceTypes": []string{"networkInterface", "ebsVolume"}, + "minimumCount": 1, + }, + Description: "Instance has attached resources that may conflict", + }, + }, + Logic: tfbridge.PredicateLogic{Op: "AND"}, + Behavior: tfbridge.WarnUser, + Reasoning: "Instance type changes may conflict with attached EBS volumes and network interfaces", + }, + }, + }, + }, + }, + } +} +``` + +### Predicate Configuration Structure + +```go +type DeleteBeforeReplaceEvaluator struct { + Predicates []DeleteBeforeReplacePredicate // List of conditions to evaluate + Logic PredicateLogic // How to combine predicates (AND, OR, NOT) + Behavior DeleteBeforeReplaceBehavior // Warn vs Auto-apply + Reasoning string // Human-readable explanation +} + +type DeleteBeforeReplacePredicate struct { + Type string // Predicate type: "fieldChanged", "hasAttachedResources", etc. + Config map[string]interface{} // Predicate-specific configuration + Description string // Human-readable description for debugging +} + +type PredicateLogic struct { + Op string // "AND", "OR", "NOT" + Expr string // Complex expressions like "(A AND B) OR C" +} + +type DeleteBeforeReplaceBehavior int +const ( + WarnUser DeleteBeforeReplaceBehavior = iota // Show warning to user + AutoApply // Automatically apply (experimental) +) +``` + +## Available Predicate Types + +### FieldChanged Predicate + +Detects when a specific field has changed between old and new resource state. + +```go +{ + Type: "fieldChanged", + Config: map[string]interface{}{ + "field": "instance_type", // Field path (supports nested: "config.network.private") + "ignoreOrder": false, // For arrays/lists, ignore order changes + "threshold": 0, // Minimum change threshold for numeric fields + }, + Description: "Instance type field is changing", +} +``` + +### HasAttachedResources Predicate + +Checks for attached or dependent resources that might conflict during replacement. + +```go +{ + Type: "hasAttachedResources", + Config: map[string]interface{}{ + "resourceTypes": []string{"networkInterface", "disk"}, // Types to check for + "checkConflicts": true, // Whether to verify potential conflicts + "minimumCount": 1, // Minimum number that triggers condition + "maximumCount": 10, // Maximum number that triggers condition + }, + Description: "Check for network interfaces that cannot be shared", +} +``` + +### FieldEquals Predicate + +Checks if a field equals specific values. + +```go +{ + Type: "fieldEquals", + Config: map[string]interface{}{ + "field": "type", // Field to check + "values": []string{"g6-standard-1", "g6-standard-2"}, // Values that trigger the condition + }, + Description: "Instance type requires special handling", +} +``` + +### FieldMatches Predicate + +Checks if a field matches a regex pattern. + +```go +{ + Type: "fieldMatches", + Config: map[string]interface{}{ + "field": "machine_type", // Field to check + "pattern": "^n1-standard-.*", // Regex pattern + }, + Description: "N1 standard instance types need special handling", +} +``` + +### ResourceCount Predicate + +Checks the count of related resources. + +```go +{ + Type: "resourceCount", + Config: map[string]interface{}{ + "resourceType": "attachedDisk", // Type of resource to count + "operator": ">=", // Comparison operator: >=, <=, ==, != + "count": 2, // Count to compare against + }, + Description: "Instance has multiple attached disks", +} +``` + +## Field Selection Guidelines + +### When to Add Warnings + +Add `WarnDeleteBeforeReplace` configuration to fields that: + +1. **Already trigger replacement** (`ForceNew: true` in Terraform schema) +2. **Have attachment conflicts** (network interfaces, storage, etc.) +3. **Commonly cause replacement failures** in real-world usage +4. **Would benefit from delete-before-replace** strategy + +### When NOT to Add Warnings + +Don't add warnings for fields that: + +1. **Don't trigger replacement** (in-place updates only) +2. **Rarely cause conflicts** (simple configuration changes) +3. **Work reliably with create-before-replace** (the default strategy) +4. **Have very fast replacement times** (where delete-before-replace isn't helpful) + +## Provider-Specific Examples + +### Google Cloud Platform (GCP) + +```go +"google_compute_instance": { + Tok: "gcp:compute/instance:Instance", + Fields: map[string]*tfbridge.SchemaInfo{ + "enable_private_nodes": { + ForceNew: &[]bool{true}[0], + DeleteBeforeReplaceFunc: &tfbridge.DeleteBeforeReplaceEvaluator{ + Predicates: []tfbridge.DeleteBeforeReplacePredicate{ + { + Type: "fieldChanged", + Config: map[string]interface{}{ + "field": "enable_private_nodes", + }, + Description: "Private nodes setting is changing", + }, + { + Type: "hasAttachedResources", + Config: map[string]interface{}{ + "resourceTypes": []string{"networkInterface", "attachedDisk"}, + "checkConflicts": true, + "minimumCount": 1, + }, + Description: "Check for network interfaces that cannot be shared", + }, + }, + Logic: tfbridge.PredicateLogic{Op: "AND"}, + Behavior: tfbridge.WarnUser, + Reasoning: "Private node changes may conflict with attached network interfaces", + }, + }, + "machine_type": { + ForceNew: &[]bool{true}[0], + DeleteBeforeReplaceFunc: &tfbridge.DeleteBeforeReplaceEvaluator{ + Predicates: []tfbridge.DeleteBeforeReplacePredicate{ + { + Type: "fieldChanged", + Config: map[string]interface{}{ + "field": "machine_type", + }, + Description: "Machine type is changing", + }, + { + Type: "hasAttachedResources", + Config: map[string]interface{}{ + "resourceTypes": []string{"persistentDisk", "networkInterface"}, + "minimumCount": 1, + }, + Description: "Instance has attached persistent resources", + }, + }, + Logic: tfbridge.PredicateLogic{Op: "AND"}, + Behavior: tfbridge.WarnUser, + Reasoning: "Machine type changes may conflict with persistent disks or network interfaces", + }, + }, + }, +}, +``` + +### AWS + +```go +"aws_instance": { + Tok: "aws:ec2/instance:Instance", + Fields: map[string]*tfbridge.SchemaInfo{ + "instance_type": { + ForceNew: &[]bool{true}[0], + DeleteBeforeReplaceFunc: &tfbridge.DeleteBeforeReplaceEvaluator{ + Predicates: []tfbridge.DeleteBeforeReplacePredicate{ + { + Type: "fieldChanged", + Config: map[string]interface{}{ + "field": "instance_type", + }, + Description: "Instance type is changing", + }, + { + Type: "hasAttachedResources", + Config: map[string]interface{}{ + "resourceTypes": []string{"ebsVolume", "networkInterface"}, + "minimumCount": 1, + }, + Description: "Instance has attached EBS volumes or network interfaces", + }, + }, + Logic: tfbridge.PredicateLogic{Op: "AND"}, + Behavior: tfbridge.WarnUser, + Reasoning: "Instance type changes may conflict with attached EBS volumes and network interfaces", + }, + }, + "subnet_id": { + ForceNew: &[]bool{true}[0], + DeleteBeforeReplaceFunc: &tfbridge.DeleteBeforeReplaceEvaluator{ + Predicates: []tfbridge.DeleteBeforeReplacePredicate{ + { + Type: "fieldChanged", + Config: map[string]interface{}{ + "field": "subnet_id", + }, + Description: "Subnet ID is changing", + }, + }, + Logic: tfbridge.PredicateLogic{Op: "AND"}, + Behavior: tfbridge.WarnUser, + Reasoning: "Subnet changes require instance replacement and may conflict with network interface attachments", + }, + }, + }, +}, +``` + +### Linode + +```go +"linode_instance": { + Tok: "linode:index/instance:Instance", + Fields: map[string]*tfbridge.SchemaInfo{ + "root_pass": { + ForceNew: &[]bool{true}[0], + DeleteBeforeReplaceFunc: &tfbridge.DeleteBeforeReplaceEvaluator{ + Predicates: []tfbridge.DeleteBeforeReplacePredicate{ + { + Type: "fieldChanged", + Config: map[string]interface{}{ + "field": "root_pass", + }, + Description: "Root password is changing", + }, + { + Type: "fieldEquals", + Config: map[string]interface{}{ + "field": "type", + "values": []string{"g6-standard-1", "g6-standard-2", "g6-standard-4"}, + }, + Description: "Instance type requires special password handling", + }, + }, + Logic: tfbridge.PredicateLogic{Op: "AND"}, + Behavior: tfbridge.WarnUser, + Reasoning: "Root password changes on these instance types require delete-before-replace to avoid configuration conflicts", + }, + }, + "type": { + ForceNew: &[]bool{true}[0], + DeleteBeforeReplaceFunc: &tfbridge.DeleteBeforeReplaceEvaluator{ + Predicates: []tfbridge.DeleteBeforeReplacePredicate{ + { + Type: "fieldChanged", + Config: map[string]interface{}{ + "field": "type", + }, + Description: "Instance type is changing", + }, + { + Type: "hasAttachedResources", + Config: map[string]interface{}{ + "resourceTypes": []string{"volume", "nodebalancer"}, + "minimumCount": 1, + }, + Description: "Instance has attached volumes or load balancer configurations", + }, + }, + Logic: tfbridge.PredicateLogic{Op: "AND"}, + Behavior: tfbridge.WarnUser, + Reasoning: "Instance type changes may conflict with attached volumes or NodeBalancer configurations", + }, + }, + "label": { + // Example: Label changes that might affect DNS or monitoring + DeleteBeforeReplaceFunc: &tfbridge.DeleteBeforeReplaceEvaluator{ + Predicates: []tfbridge.DeleteBeforeReplacePredicate{ + { + Type: "fieldChanged", + Config: map[string]interface{}{ + "field": "label", + }, + Description: "Instance label is changing", + }, + { + Type: "fieldMatches", + Config: map[string]interface{}{ + "field": "label", + "pattern": "^prod-.*", + }, + Description: "Production instance labels have special requirements", + }, + }, + Logic: tfbridge.PredicateLogic{Op: "AND"}, + Behavior: tfbridge.WarnUser, + Reasoning: "Label changes on production instances should use delete-before-replace to ensure DNS and monitoring consistency", + }, + }, + }, +}, +``` + +### Azure + +```go +"azurerm_virtual_machine": { + Tok: "azure:compute/virtualMachine:VirtualMachine", + Fields: map[string]*tfbridge.SchemaInfo{ + "vm_size": { + ForceNew: &[]bool{true}[0], + WarnDeleteBeforeReplace: &tfbridge.FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "VM size changes require replacement and may conflict with attached managed disks or network interfaces", + }, + }, + "network_interface_ids": { + ForceNew: &[]bool{true}[0], + WarnDeleteBeforeReplace: &tfbridge.FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "Network interface changes may conflict with subnet assignments and IP allocations", + }, + }, + }, +}, +``` + +## Writing Effective Warning Messages + +### Good Warning Reasons + +```go +// ✅ Good: Specific and actionable +Reason: "Instance type changes may conflict with attached EBS volumes that cannot be moved between instance types" + +// ✅ Good: Explains the technical cause +Reason: "Network interface changes require delete-before-replace because interfaces cannot be attached to multiple instances simultaneously" + +// ✅ Good: Mentions specific resource types +Reason: "Machine type changes may conflict with persistent disks, network interfaces, or GPU attachments" +``` + +### Poor Warning Reasons + +```go +// ❌ Poor: Too vague +Reason: "This field might cause problems" + +// ❌ Poor: No explanation of why +Reason: "Use delete-before-replace for this field" + +// ❌ Poor: Technical jargon without context +Reason: "ForceNew attribute may cause resource conflict" +``` + +### Warning Message Guidelines + +1. **Be specific** about what resources might conflict +2. **Explain why** the conflict occurs (technical reason) +3. **Mention specific attachment types** (disks, interfaces, etc.) +4. **Keep it under 100 characters** for readability +5. **Use present tense** ("changes may conflict" not "changes will conflict") + +## Testing Your Configuration + +### Unit Tests + +Test that your warning configuration works correctly: + +```go +func TestDeleteBeforeReplaceWarnings(t *testing.T) { + provider := &tfbridge.Provider{ + // ... provider setup ... + } + + // Test that warnings appear when configured fields change + olds := resource.PropertyMap{ + "instance_type": resource.NewStringProperty("t2.micro"), + } + news := resource.PropertyMap{ + "instance_type": resource.NewStringProperty("t2.small"), + } + + res := &tfbridge.ResourceInfo{ + Fields: map[string]*tfbridge.SchemaInfo{ + "instance_type": { + ForceNew: &[]bool{true}[0], + WarnDeleteBeforeReplace: &tfbridge.FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "Instance type changes may conflict with attached resources", + }, + }, + }, + } + + warnings := provider.CheckFieldDeleteBeforeReplaceWarnings(olds, news, res) + + assert.Len(t, warnings, 1) + assert.Equal(t, "instance_type", warnings[0].Field) + assert.Contains(t, warnings[0].Reason, "may conflict") +} +``` + +### Integration Tests + +Test the full user experience: + +```go +func TestInstanceTypeChangeShowsWarning(t *testing.T) { + // Create a test program that changes instance_type + program := ` +name: test +runtime: yaml +resources: + instance: + type: aws:ec2/instance:Instance + properties: + instanceType: t2.small # Changed from t2.micro + ami: ami-12345 +` + + // Run preview and check for warnings + result := integration.RunPulumiPreview(t, program) + assert.Contains(t, result.Output, "may require deleteBeforeReplace") + assert.Contains(t, result.Output, "Instance type changes may conflict") +} +``` + +## Best Practices + +### Configuration Best Practices + +1. **Start conservatively** - Only add warnings for fields with known conflict patterns +2. **Monitor user feedback** - Add warnings based on actual user support requests +3. **Update based on cloud provider changes** - Cloud APIs evolve and conflict patterns change +4. **Document in provider README** - List which fields have delete-before-replace warnings + +### Message Best Practices + +1. **Test messages with real users** - Ensure they're helpful and clear +2. **Keep messages actionable** - Users should know what to do next +3. **Link to documentation** - Reference provider-specific guidance when possible +4. **Update messages based on feedback** - Iterate on clarity and usefulness + +### Provider Maintenance + +1. **Review warnings quarterly** - Remove warnings that are no longer needed +2. **Add warnings for new resources** - New cloud services may have new conflict patterns +3. **Monitor cloud provider documentation** - Changes in replacement behavior should update warnings +4. **Track user sentiment** - Are the warnings helpful or annoying? + +## Migration from Resource-Level DeleteBeforeReplace + +If your provider currently uses resource-level `DeleteBeforeReplace: true`: + +### Before (Resource-Level) +```go +"aws_instance": { + Tok: "aws:ec2/instance:Instance", + DeleteBeforeReplace: true, // Applied to ALL field changes +}, +``` + +### After (Field-Level) +```go +"aws_instance": { + Tok: "aws:ec2/instance:Instance", + // Remove resource-level setting + Fields: map[string]*tfbridge.SchemaInfo{ + "instance_type": { + ForceNew: &[]bool{true}[0], + WarnDeleteBeforeReplace: &tfbridge.FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "Instance type changes may conflict with attached resources", + }, + }, + // Only add warnings for fields that actually need them + }, +}, +``` + +### Migration Steps + +1. **Identify problematic fields** - Which specific fields cause replacement conflicts? +2. **Remove resource-level setting** - Stop applying delete-before-replace to all changes +3. **Add field-level warnings** - Configure warnings only for problematic fields +4. **Test thoroughly** - Ensure no regression in user experience +5. **Update documentation** - Reflect the more granular approach + +## Common Patterns by Resource Type + +### Compute Instances +- **Instance size/type changes**: Almost always need warnings +- **Network interface changes**: Usually need warnings +- **Storage configuration**: Often needs warnings +- **Simple metadata changes**: Usually don't need warnings + +### Networking Resources +- **Subnet changes**: Often need warnings due to IP conflicts +- **Security group changes**: Sometimes need warnings +- **Route table changes**: Rarely need warnings + +### Storage Resources +- **Disk size changes**: Usually don't need warnings (often in-place) +- **Disk type changes**: Often need warnings +- **Encryption changes**: Usually need warnings + +## Troubleshooting + +### Warning Not Appearing + +Check that: +1. Field is configured with `WarnDeleteBeforeReplace.Enabled: true` +2. Field value is actually changing between old and new state +3. Field has `ForceNew: true` (warnings only make sense for replacement-triggering fields) + +### Too Many Warnings + +If users complain about too many warnings: +1. Review which fields actually cause conflicts in practice +2. Remove warnings for fields that rarely cause issues +3. Make warning messages more specific and actionable + +### Warnings for Wrong Scenarios + +If warnings appear when they shouldn't: +1. Check field change detection logic +2. Verify the field actually triggers replacement +3. Consider if the warning reason is accurate + +This field-level warning system helps provider developers give their users more targeted guidance about when delete-before-replace is beneficial, improving the overall user experience with complex resource replacements. \ No newline at end of file diff --git a/field_level_delete_before_replace_plan.md b/field_level_delete_before_replace_plan.md new file mode 100644 index 000000000..1cd274e71 --- /dev/null +++ b/field_level_delete_before_replace_plan.md @@ -0,0 +1,568 @@ +# Field-Level Conditional DeleteBeforeReplace Implementation Plan + +## Executive Summary + +This document outlines the implementation plan for adding field-level conditional `DeleteBeforeReplace` functionality to the Pulumi Terraform Bridge. The solution uses a predicate-based system to enable dynamic decision-making about when resource replacement should use delete-before-replace vs create-before-replace strategies. + +**Key Innovation**: Instead of static boolean values, this implements a "truthiness function" approach using structured predicates that can evaluate resource state at runtime to make intelligent replacement decisions. + +## Problem Statement + +### Core Issue +Several bridged Terraform providers have fields that trigger resource replacement, but the replacement fails unless the resource is deleted before creating the replacement. Currently, `DeleteBeforeReplace` only works at the resource level, creating an all-or-nothing approach. + +### Specific Examples + +1. **Google Private EKS Clusters**: Field `enable_private_nodes` triggers replacement, but fails if attached network interfaces conflict between old and new resources +2. **Linode Instances**: Changes to `root_pass` on certain instance types require delete-before-replace to avoid configuration conflicts +3. **General Pattern**: The need for delete-before-replace is often conditional based on resource state, not just field changes + +### Current Limitations +- Resource-level `DeleteBeforeReplace` is too broad (affects all field changes) +- No way to specify conditional logic (e.g., "only if attached resources exist") +- Users must manually configure `deleteBeforeReplace: true` with limited guidance + +## Solution Architecture + +### High-Level Approach + +1. **Predicate-Based System**: Use structured predicates instead of boolean flags +2. **Runtime Evaluation**: Evaluate conditions during diff computation using actual resource state +3. **Warning-First Strategy**: Start with warnings to users, then add experimental automatic behavior +4. **Extensible Framework**: Allow new predicates to be added without breaking changes + +### Core Components + +#### 1. Schema Enhancement + +```go +// Location: pkg/tfbridge/info/info.go +type Schema struct { + // ... existing fields + ForceNew *bool + DeleteBeforeReplace *bool // Simple boolean case (unchanged) + DeleteBeforeReplaceFunc *DeleteBeforeReplaceEvaluator // NEW: Predicate-based evaluation +} + +type DeleteBeforeReplaceEvaluator struct { + // Structured predicates for transparency and debuggability + Predicates []DeleteBeforeReplacePredicate + Logic PredicateLogic // How to combine predicates (AND, OR, NOT) + + // Behavior control + Behavior DeleteBeforeReplaceBehavior // Warn vs Auto-apply + Reasoning string // Human-readable explanation +} + +type DeleteBeforeReplacePredicate struct { + Type PredicateType + Config map[string]interface{} + Description string // Human-readable description for debugging +} + +type PredicateLogic struct { + Op string // "AND", "OR", "NOT" + Expr string // Complex expressions like "(A AND B) OR C" +} + +type DeleteBeforeReplaceBehavior int +const ( + WarnUser DeleteBeforeReplaceBehavior = iota // Default: warn user + AutoApply // Experimental: auto-apply +) +``` + +#### 2. Predicate Types + +```go +type PredicateType string + +// Core predicates for initial implementation +const ( + HasAttachedResources PredicateType = "hasAttachedResources" // Check for dependent resources + FieldChanged PredicateType = "fieldChanged" // Specific field has changed + FieldEquals PredicateType = "fieldEquals" // Field equals specific value + FieldMatches PredicateType = "fieldMatches" // Field matches regex/pattern + ResourceCount PredicateType = "resourceCount" // Count of related resources + StateCheck PredicateType = "stateCheck" // Custom state validation +) +``` + +#### 3. Runtime Evaluation Engine + +```go +// Location: pkg/tfbridge/predicate_evaluator.go (NEW FILE) +type PredicateEvaluator struct { + // Core evaluation logic +} + +func (pe *PredicateEvaluator) Evaluate( + predicates []DeleteBeforeReplacePredicate, + logic PredicateLogic, + old, new resource.PropertyMap, + meta ResourceMetadata, +) (*EvaluationResult, error) { + // Evaluate each predicate and combine using logic +} + +type ResourceMetadata struct { + ProviderName string + ResourceType string + AttachedResources []AttachedResourceInfo + // Additional context as needed +} + +type EvaluationResult struct { + ShouldDeleteBeforeReplace bool + Reasoning []string // Step-by-step reasoning + MatchedPredicates []string // Which predicates matched +} +``` + +## Implementation Plan + +### Phase 1: Foundation Infrastructure (3 weeks) + +#### Week 1: Core Schema and Predicate Framework +**Deliverables:** +- Add `DeleteBeforeReplaceEvaluator` to `pkg/tfbridge/info/info.go` +- Implement predicate type definitions +- Create `PredicateEvaluator` with basic evaluation logic +- Unit tests for predicate evaluation + +**Files to Modify:** +- `pkg/tfbridge/info/info.go` - Add new schema structs +- `pkg/tfbridge/predicate_evaluator.go` - NEW: Core evaluation engine +- `pkg/tfbridge/predicate_evaluator_test.go` - NEW: Unit tests + +**Risk Level:** Low - Pure additive changes, no behavior modification + +#### Week 2: Core Predicate Implementations +**Deliverables:** +- Implement `HasAttachedResources` predicate +- Implement `FieldChanged` predicate +- Implement `FieldEquals` predicate +- Comprehensive test suite for each predicate + +**Files to Modify:** +- `pkg/tfbridge/predicates/` - NEW DIRECTORY: Individual predicate implementations +- `pkg/tfbridge/predicates/attached_resources.go` - NEW +- `pkg/tfbridge/predicates/field_operations.go` - NEW +- Corresponding test files + +**Risk Level:** Low - Self-contained predicate logic + +#### Week 3: Integration with Diff Logic +**Deliverables:** +- Integrate predicate evaluation into existing diff computation +- Add warning infrastructure for recommendations +- Preserve all existing behavior (no automatic application yet) + +**Files to Modify:** +- `pkg/tfbridge/detailed_diff.go` - Add predicate evaluation +- `pkg/tfbridge/provider.go` - Integrate warnings into diff response +- `pkg/tfbridge/property_path.go` - Add evaluation utilities + +**Risk Level:** Medium - Touches core diff logic, but only adds warnings + +### Phase 2: Provider Integration & Testing (2 weeks) + +#### Week 4: GCP Provider Integration +**Deliverables:** +- Implement GCP-specific predicate configurations +- Add `HasAttachedResources` detection for compute instances +- Comprehensive testing with real GCP scenarios + +**Example Configuration:** +```go +// In GCP provider code +"enable_private_nodes": { + DeleteBeforeReplaceFunc: &DeleteBeforeReplaceEvaluator{ + Predicates: []DeleteBeforeReplacePredicate{ + { + Type: HasAttachedResources, + Config: map[string]interface{}{ + "resourceTypes": []string{"networkInterface", "attachedDisk"}, + "checkConflicts": true, + }, + Description: "Check for network interfaces that cannot be shared", + }, + { + Type: FieldChanged, + Config: map[string]interface{}{ + "field": "enable_private_nodes", + }, + Description: "Private nodes setting is changing", + }, + }, + Logic: PredicateLogic{Op: "AND"}, + Behavior: WarnUser, + Reasoning: "Private node changes may conflict with attached network interfaces", + }, +} +``` + +**Files to Modify:** +- Provider-specific configuration files +- Integration tests with real GCP resources + +**Risk Level:** Medium - Provider-specific changes, extensive testing required + +#### Week 5: Linode Provider Integration & Cross-Framework Support +**Deliverables:** +- Implement Linode-specific configurations +- Ensure Plugin Framework support mirrors SDKv2 +- Cross-provider testing and validation + +**Example Configuration:** +```go +// In Linode provider code +"root_pass": { + DeleteBeforeReplaceFunc: &DeleteBeforeReplaceEvaluator{ + Predicates: []DeleteBeforeReplacePredicate{ + { + Type: FieldEquals, + Config: map[string]interface{}{ + "field": "type", + "values": []string{"g6-standard-1", "g6-standard-2"}, + }, + Description: "Instance type requires special handling", + }, + }, + Logic: PredicateLogic{Op: "OR"}, + Behavior: WarnUser, + Reasoning: "Root password changes on these instance types require delete-before-replace", + }, +} +``` + +**Files to Modify:** +- `pkg/pf/` - Plugin Framework equivalent implementations +- Provider-specific configurations +- Cross-framework test suite + +**Risk Level:** Medium - Multi-framework coordination required + +### Phase 3: Experimental Automatic Behavior (2 weeks) + +#### Week 6: Feature Flag Implementation +**Deliverables:** +- Add `PULUMI_EXPERIMENTAL_AUTO_DELETE_BEFORE_REPLACE` feature flag +- Implement automatic application of delete-before-replace when flag is enabled +- Extensive logging and user feedback mechanisms + +**Files to Modify:** +- `pkg/tfbridge/provider.go` - Add feature flag detection and automatic behavior +- Environment variable handling +- Logging infrastructure + +**Risk Level:** High - Changes actual replacement behavior, requires extensive testing + +#### Week 7: User Experience & Debugging Tools +**Deliverables:** +- Enhanced warning messages with clear reasoning +- Debugging tools for predicate evaluation +- Documentation for troubleshooting + +**Example Warning Output:** +``` +Warning: Field 'enablePrivateNodes' may require deleteBeforeReplace +├─ Reason: Change detected in private nodes configuration +├─ Condition: Instance has attached network interfaces that cannot be shared +├─ Recommendation: Set 'deleteBeforeReplace: true' on this resource +├─ Alternative: Remove conflicting attachments before update +└─ Learn more: https://pulumi.com/docs/delete-before-replace-conditions + +To automatically apply this recommendation, set: +PULUMI_EXPERIMENTAL_AUTO_DELETE_BEFORE_REPLACE=true +``` + +**Files to Modify:** +- Warning and error message systems +- Documentation generation +- User-facing help text + +**Risk Level:** Low - UX improvements only + +### Phase 4: Documentation & Validation (2 weeks) + +#### Week 8: Documentation Generation Enhancement +**Deliverables:** +- Enhance tfgen to document conditional delete-before-replace behavior +- Add field-level documentation annotations +- Update provider documentation with examples + +**Files to Modify:** +- `pkg/tfgen/generate.go` - Add conditional documentation +- `pkg/tfgen/docs.go` - Enhanced doc processing +- Documentation templates + +**Risk Level:** Low - Documentation improvements only + +#### Week 9: Comprehensive Testing & Validation +**Deliverables:** +- End-to-end integration tests +- Cross-provider compatibility testing +- Performance impact assessment +- Regression testing suite + +**Test Categories:** +1. **Unit Tests**: Individual predicate logic +2. **Integration Tests**: Full diff computation with predicates +3. **Cross-Tests**: Compare Terraform vs Pulumi behavior +4. **Provider Tests**: Real-world scenarios with GCP/Linode +5. **Performance Tests**: Impact on diff computation time +6. **Regression Tests**: Ensure existing providers unaffected + +**Risk Level:** Critical - Final validation before release + +## Detailed Technical Specifications + +### Predicate Implementation Details + +#### HasAttachedResources Predicate +```go +type HasAttachedResourcesConfig struct { + ResourceTypes []string // Types to check: "networkInterface", "disk", etc. + CheckConflicts bool // Whether to verify potential conflicts + MinimumCount int // Minimum number that triggers condition + MaximumCount int // Maximum number that triggers condition +} + +func (p *HasAttachedResourcesPredicate) Evaluate( + config HasAttachedResourcesConfig, + old, new resource.PropertyMap, + meta ResourceMetadata, +) bool { + // Implementation: + // 1. Query attached resources from metadata + // 2. Filter by specified types + // 3. Check count against min/max thresholds + // 4. Optionally verify conflict potential +} +``` + +#### FieldChanged Predicate +```go +type FieldChangedConfig struct { + Field string // Field path (supports nested: "config.network.private") + IgnoreOrder bool // For arrays/lists, ignore order changes + Threshold interface{} // Minimum change threshold for numeric fields +} + +func (p *FieldChangedPredicate) Evaluate( + config FieldChangedConfig, + old, new resource.PropertyMap, +) bool { + // Implementation: + // 1. Extract field values from old and new maps + // 2. Compare values with appropriate logic + // 3. Apply threshold/ignore rules as configured +} +``` + +### Runtime Integration + +#### Diff Logic Integration +```go +// Location: pkg/tfbridge/detailed_diff.go +func (p *Provider) buildDetailedDiff( + olds, news resource.PropertyMap, + res *ResourceInfo, +) (*DiffResult, error) { + // ... existing diff logic ... + + // NEW: Evaluate delete-before-replace predicates + deleteBeforeReplaceRecommendation, err := p.evaluateDeleteBeforeReplaceConditions( + olds, news, res, + ) + if err != nil { + return nil, err + } + + // Add warnings to diff result + if deleteBeforeReplaceRecommendation.ShouldWarn { + result.Warnings = append(result.Warnings, deleteBeforeReplaceRecommendation.Warning) + } + + // Apply automatic behavior if experimental flag is enabled + if deleteBeforeReplaceRecommendation.ShouldAutoApply && isExperimentalFlagEnabled() { + result.DeleteBeforeReplace = true + result.Reasoning = append(result.Reasoning, deleteBeforeReplaceRecommendation.Reasoning...) + } + + return result, nil +} +``` + +#### Warning System Enhancement +```go +type DeleteBeforeReplaceWarning struct { + Field string + Reason string + Conditions []string + Suggestions []string + LearnMoreURL string +} + +func (w *DeleteBeforeReplaceWarning) Format() string { + // Generate user-friendly warning message with tree structure +} +``` + +## Testing Strategy + +### Test Coverage Requirements + +1. **Unit Test Coverage**: >90% for all new predicate and evaluation code +2. **Integration Test Coverage**: >80% for diff logic integration +3. **Cross-Provider Tests**: Verify consistent behavior across GCP, Linode, and other providers +4. **Performance Tests**: Ensure <5% impact on diff computation time +5. **Regression Tests**: Zero regression in existing provider behavior + +### Test Scenarios + +#### Unit Test Scenarios +- Individual predicate evaluation with various inputs +- Logic combination (AND, OR, NOT) with multiple predicates +- Edge cases: empty fields, null values, complex nested structures +- Error handling: invalid configurations, missing metadata + +#### Integration Test Scenarios +- Real resource diffs with predicate evaluation +- Warning generation and formatting +- Experimental flag behavior +- Cross-framework consistency (SDKv2 vs Plugin Framework) + +#### Provider-Specific Test Scenarios +- **GCP**: Compute instances with various attachment configurations +- **Linode**: Different instance types and configuration changes +- **Generic**: Common patterns applicable to multiple providers + +### Performance Benchmarks + +Target performance impact: +- Predicate evaluation: <1ms per field +- Overall diff computation: <5% increase +- Memory usage: <10% increase during diff computation + +## Risk Assessment & Mitigation + +### High-Risk Areas + +1. **Core Diff Logic Changes** + - **Risk**: Breaking existing provider behavior + - **Mitigation**: Extensive regression testing, phased rollout + - **Rollback Plan**: Feature flag to disable predicate evaluation + +2. **Cross-Framework Consistency** + - **Risk**: Different behavior between SDKv2 and Plugin Framework + - **Mitigation**: Shared evaluation engine, comprehensive cross-tests + - **Monitoring**: Automated tests comparing both frameworks + +3. **Performance Impact** + - **Risk**: Slowing down diff computation + - **Mitigation**: Performance benchmarks, lazy evaluation, caching + - **Monitoring**: Continuous performance regression testing + +### Medium-Risk Areas + +1. **Predicate Configuration Complexity** + - **Risk**: Users struggling with configuration + - **Mitigation**: Clear documentation, validation, helpful error messages + - **Support**: Examples and troubleshooting guides + +2. **Provider-Specific Logic** + - **Risk**: Incorrect cloud provider assumptions + - **Mitigation**: Collaboration with provider teams, extensive testing + - **Validation**: Real-world scenario testing + +### Low-Risk Areas + +1. **Documentation Generation** + - **Risk**: Incorrect or unclear documentation + - **Mitigation**: Review process, user testing + - **Impact**: Documentation-only, easily fixable + +2. **Warning Messages** + - **Risk**: Confusing or unhelpful warnings + - **Mitigation**: User experience testing, iterative improvement + - **Impact**: UX only, no functional impact + +## Success Metrics + +### Phase 1 Success Criteria +- [ ] Predicate evaluation framework implemented and tested +- [ ] Zero regression in existing provider functionality +- [ ] Unit test coverage >90% for new code +- [ ] Performance impact <2% on diff computation + +### Phase 2 Success Criteria +- [ ] GCP and Linode provider integrations working correctly +- [ ] Warning system generating helpful recommendations +- [ ] User feedback indicates warnings are accurate >80% of the time +- [ ] Cross-framework behavior is consistent + +### Phase 3 Success Criteria +- [ ] Experimental automatic behavior functions correctly +- [ ] Feature flag system robust and safe +- [ ] Extensive logging provides clear audit trail +- [ ] No unintended automatic applications + +### Phase 4 Success Criteria +- [ ] Documentation clearly explains new functionality +- [ ] User adoption of conditional delete-before-replace increases +- [ ] Support tickets related to replacement failures decrease +- [ ] Community feedback is positive + +### Long-term Success Metrics (6+ months) +- [ ] Reduced replacement-related failures in production +- [ ] Increased user satisfaction with replacement behavior +- [ ] Successful adoption by additional providers beyond GCP/Linode +- [ ] Feature graduates from experimental to stable + +## Migration Path & Backwards Compatibility + +### Backwards Compatibility Guarantees +1. **Existing Configurations**: All current `DeleteBeforeReplace: true` configurations continue working unchanged +2. **No Breaking Changes**: New functionality is purely additive +3. **Opt-in Behavior**: Predicate-based evaluation only occurs when explicitly configured +4. **Default Behavior**: Unchanged for resources not using new predicates + +### Migration Strategy +1. **Phase 1**: New functionality available but not automatically applied +2. **Phase 2**: Providers can opt-in to predicate-based configurations +3. **Phase 3**: Community feedback and refinement +4. **Phase 4**: Stable release with full documentation + +### Deprecation Path (Future) +- No immediate deprecation planned +- Boolean `DeleteBeforeReplace` remains supported indefinitely +- Future versions may encourage migration to predicate-based approach + +## Resource Requirements + +### Development Team +- **Lead Developer**: 1 FTE for full 9-week duration +- **Provider Specialists**: 0.5 FTE each for GCP and Linode integration (weeks 4-5) +- **Testing Engineer**: 0.5 FTE for comprehensive testing (weeks 6-9) +- **Technical Writer**: 0.25 FTE for documentation (weeks 8-9) + +### Infrastructure Requirements +- Enhanced CI/CD pipeline for cross-provider testing +- Performance monitoring infrastructure +- Extended test environments for multiple providers + +### Review & Approval Process +- Architecture review at end of Phase 1 +- Security review before Phase 3 (experimental behavior) +- Provider team approval for each provider integration +- Documentation review before final release + +## Conclusion + +This implementation plan provides a comprehensive approach to adding field-level conditional `DeleteBeforeReplace` functionality to the Pulumi Terraform Bridge. The predicate-based system offers the flexibility of "truthiness functions" while maintaining transparency, debuggability, and cross-language compatibility. + +The phased approach minimizes risk by starting with warnings and progressing to automatic behavior only after validation. The extensive testing strategy ensures compatibility and performance, while the migration path preserves backwards compatibility. + +This feature will significantly improve the user experience for complex resource replacement scenarios while providing a foundation for future enhancements to the bridge's intelligence and automation capabilities. \ No newline at end of file diff --git a/implementation_guide.md b/implementation_guide.md new file mode 100644 index 000000000..9444b04e1 --- /dev/null +++ b/implementation_guide.md @@ -0,0 +1,263 @@ +# Provider Inconsistency Detection Implementation Guide (Revised) + +## Overview + +This implementation guide outlines how to add a feature to detect and report when upstream Terraform providers produce inconsistent results after apply operations. This addresses issue #2413 where Terraform providers with bugs that trigger an "inconsistent result of apply" error don't properly surface this information to Pulumi users. + +The implementation will: +1. Add comparison logic between planned state and actual state after create/update operations +2. Log detected inconsistencies as warnings to help users troubleshoot +3. Make this feature configurable with environment variables to ensure backward compatibility +4. Filter out known/expected inconsistencies to reduce noise + +## Background + +Terraform and OpenTofu detect inconsistencies between the planned state and actual state after an apply operation. When detected, they display an error message like: + +``` +Error: Provider produced inconsistent result after apply + +When applying changes to resource_type.resource_name, provider +produced an unexpected new value: +.some_property: was cty.ListValEmpty(cty.String), but now null. + +This is a bug in the provider, which should be reported in the provider's own issue +tracker. +``` + +Currently, Pulumi's Terraform bridge doesn't perform this detection, so users just experience inexplicable diffs on subsequent operations when this issue occurs. + +## OpenTofu Implementation Analysis + +OpenTofu implements inconsistency detection by comparing the planned state against the final state returned after an apply operation. Key aspects of their implementation: + +1. The detection happens in the apply phase, after a resource has been created or updated +2. The comparison is done attribute-by-attribute, looking for differences between planned and actual values +3. The error message includes specific paths to inconsistent values and shows both the expected and actual values +4. The issue is reported as an error but doesn't prevent the apply operation from completing +5. The message clearly indicates this is a provider bug, not a user configuration error + +OpenTofu's implementation handles special cases like: +- Comparing empty collections vs. null values (a common source of inconsistencies) +- Type differences (e.g., string representation vs. actual type) +- Nested attributes in complex data structures + +## Implementation Lessons Learned + +### Key Design Considerations + +1. **Post-Apply Analysis Only**: The inconsistency detection should happen only after the resource has been successfully created or updated. It should not modify any core behavior or affect the creation/update process itself. + +2. **No Side Effects**: The detection code must be purely analytical. It should not modify any state, interfere with naming, auto-aliasing, or other bridge features. + +3. **Warning Only**: Detected inconsistencies should be logged as warnings only, not errors. This is a change from OpenTofu's approach to avoid disrupting Pulumi workflows while still providing visibility. + +4. **Separate from Core Logic**: Keep the feature logic isolated from core bridge functionality to avoid introducing bugs or regressions. + +5. **Minimal Dependencies**: The detection code should have minimal dependencies on other bridge components to reduce the risk of unexpected interactions. + +### Implementation Pitfalls to Avoid + +1. **❌ DO NOT modify the resource state**: The implementation should never modify the state being returned by the provider, even if inconsistencies are detected. This would lead to unexpected behavior. + +2. **❌ DO NOT alter the resource ID**: Never manipulate the resource ID or other crucial identifiers during inconsistency detection. This will break resources. + +3. **❌ DO NOT interfere with autonaming or renaming**: The detection happens after these processes and should not attempt to modify them. + +4. **❌ DO NOT use different output structures**: Keep the interface to inconsistency detection simple and focused. + +5. **❌ DO NOT add excessive filtering too early**: Start with minimal filtering and add more based on real-world usage patterns. + +### Recommended Implementation Approach + +1. **Create New Files**: + - `pkg/tfbridge/inconsistency_detector.go`: Core detection logic for SDKv2 providers + - `pkg/pf/tfbridge/inconsistency_detector.go`: Plugin Framework specific implementation + - `pkg/tfbridge/inconsistency_filter.go`: Filtering logic for known false positives + +2. **Integration Points**: + - Add detection calls after successful creation in the `Create` method + - Add detection calls after successful updates in the `Update` method + - Do not modify any other parts of the code flow + +3. **Keep Analysis Simple**: + - Compare planned state with actual state + - Filter out known false positives + - Log inconsistencies as warnings + +## Step-by-Step Implementation + +1. **Define Configuration System**: + ```go + // In pkg/tfbridge/inconsistency_config.go + const ( + EnvDetectInconsistentApply = "PULUMI_DETECT_INCONSISTENT_APPLY" + EnvDetectInconsistentApplyDetail = "PULUMI_DETECT_INCONSISTENT_APPLY_DETAIL" + EnvDetectInconsistentApplyResources = "PULUMI_DETECT_INCONSISTENT_APPLY_RESOURCES" + ) + + type InconsistencyConfig struct { + Enabled bool + DetailLevel string + ResourceTypes map[string]bool + } + + func GetInconsistencyConfig() InconsistencyConfig { + // Parse environment variables + } + + func ShouldDetectInconsistentApply(resourceType string) bool { + // Check if enabled for this resource + } + ``` + +2. **Create Filter Interface**: + ```go + // In pkg/tfbridge/inconsistency_filter.go + type InconsistencyFilter interface { + ShouldIgnoreAttribute(resourceType, attrName string) bool + ShouldIgnoreValueChange(resourceType, attrName string, plannedVal, actualVal interface{}) bool + } + + func NewDefaultInconsistencyFilter() InconsistencyFilter { + // Create default implementation with common filters + } + ``` + +3. **Implement Detection Logic**: + ```go + // In pkg/tfbridge/inconsistency_detector.go + type InconsistencyDetail struct { + Path string + PlannedValue interface{} + ActualValue interface{} + Description string + } + + func detectInconsistentApply( + ctx context.Context, + resourceType string, + plannedState shim.InstanceState, + actualState shim.InstanceState, + filter InconsistencyFilter, + ) []InconsistencyDetail { + // Return detected inconsistencies + } + + func formatInconsistencyMessage(resourceType string, details []InconsistencyDetail) string { + // Format human-readable message + } + ``` + +4. **Add To Provider Create/Update Methods**: + ```go + // At the end of the Create/Update method, after a successful apply + // but before returning the results + if !preview && ShouldDetectInconsistentApply(resourceType) { + inconsistencies := detectInconsistentApply( + ctx, resourceType, plannedState, actualState, filter) + + if len(inconsistencies) > 0 { + message := formatInconsistencyMessage(resourceType, inconsistencies) + logger := getLogger(ctx) + logger.Warn(message) + } + } + ``` + +5. **Add Comprehensive Tests**: + ```go + // In pkg/tfbridge/inconsistency_detector_test.go + func TestDetectInconsistentApply(t *testing.T) { + // Test various inconsistency scenarios + } + + func TestInconsistencyFiltering(t *testing.T) { + // Test filtering behavior + } + ``` + +## Common Inconsistency Patterns + +Based on analysis of OpenTofu issues and provider bugs, these patterns should be specifically addressed: + +1. **Null vs. Empty Collections**: Many providers inconsistently return `null` instead of empty arrays/maps or vice versa + ``` + .tags: was "[]", but now null + ``` + +2. **Type Inconsistencies**: Particularly between string representations and actual types + ``` + .count: was "5", but now 5 + ``` + +3. **Computed Attributes**: Providers sometimes unexpectedly add computed attributes + ``` + .computed_attribute: was absent, but now "auto-generated-value" + ``` + +4. **ID Format Changes**: Changes in formatting for resource IDs or ARNs + ``` + .id: was "my-resource", but now "my_resource" + ``` + +5. **Eventual Consistency Issues**: Resources that haven't fully propagated may return inconsistent results + +## Documentation For Users + +```markdown +## Detecting Provider Inconsistencies + +To help identify issues with Terraform providers, Pulumi can detect and report inconsistencies +between the planned state and actual state after resource operations. + +### Environment Variables + +- `PULUMI_DETECT_INCONSISTENT_APPLY`: Set to `true` to enable detection +- `PULUMI_DETECT_INCONSISTENT_APPLY_DETAIL`: Controls detail level (`normal`, `debug`, `trace`) +- `PULUMI_DETECT_INCONSISTENT_APPLY_RESOURCES`: Optional comma-separated resource types to check + +### Example Usage + +```bash +# Enable detection for all resources +export PULUMI_DETECT_INCONSISTENT_APPLY=true + +# Enable detailed logging for specific resources +export PULUMI_DETECT_INCONSISTENT_APPLY=true +export PULUMI_DETECT_INCONSISTENT_APPLY_DETAIL=debug +export PULUMI_DETECT_INCONSISTENT_APPLY_RESOURCES="aws_lambda_function,aws_iam_role" +``` + +When inconsistencies are detected, you'll see warning messages that can help +identify bugs in the provider. These may explain unexplained diffs you see on +subsequent operations. +``` + +## Test Plan + +1. **Unit Tests**: + - Test configuration parsing + - Test detection logic with various examples + - Test filtering system + - Test specifically against known inconsistency patterns + +2. **Integration Tests**: + - Create a test provider that deliberately returns inconsistent states + - Verify detection works with real provider interactions + +3. **Manual Testing**: + - Test with known problematic providers (AWS, Azure, GCP) + - Verify minimal performance impact + +## Performance Considerations + +- Enable logging only when the feature is activated +- Add early-return checks to avoid unnecessary processing +- Implement sampling for large collections to reduce overhead +- Add depth limits for recursive comparisons +- Use memory-efficient comparison algorithms for large state objects + +## Conclusion + +Adding inconsistency detection will help users understand and troubleshoot issues with Terraform providers. By implementing this carefully with proper isolation from core bridge functionality, we can provide this feature without affecting existing functionality or introducing regressions. Following OpenTofu's approach for detection but using warnings instead of errors will maintain compatibility while improving user experience. \ No newline at end of file diff --git a/implementation_status.md b/implementation_status.md new file mode 100644 index 000000000..5e076866a --- /dev/null +++ b/implementation_status.md @@ -0,0 +1,54 @@ +# Implementation Plan Status + +## Documents Updated for Architects + +All implementation plans have been updated to use the complete predicate-based system instead of simple warnings. + +### 1. Full Implementation Plan +**File:** `field_level_delete_before_replace_plan.md` +- **Status:** ✅ Complete predicate system (567 lines) +- **Scope:** Full 9-week implementation with all predicate types +- **Content:** Complete architecture, all predicate types, cross-framework support + +### 2. MVP Implementation Plan +**File:** `mvp_field_delete_before_replace.md` +- **Status:** ✅ Updated to use predicate framework +- **Scope:** 3-week MVP with just `fieldChanged` predicate +- **Content:** Simplified predicate system, GCP + Linode examples + +### 3. Developer Documentation +**File:** `docs/guides/field-level-delete-before-replace.md` +- **Status:** ✅ Complete predicate system with all types +- **Scope:** Provider developer guide +- **Content:** All predicate types, GCP/AWS/Linode examples, testing guidance + +## Key Changes Made + +### Architecture Consistency +- All documents now use the `DeleteBeforeReplaceEvaluator` struct +- Predicate-based configuration throughout +- Consistent type definitions across all plans + +### Enhanced Examples +- **GCP:** `enable_private_nodes` and `machine_type` with `fieldChanged` predicates +- **Linode:** `root_pass` with `fieldChanged` predicate +- **AWS:** Updated to use predicate system +- **Complete predicate types:** `fieldChanged`, `hasAttachedResources`, `fieldEquals`, `fieldMatches`, `resourceCount` + +### Developer-Focused Content +- Comprehensive predicate configuration examples +- Testing strategies for provider developers +- Migration guidance from simple boolean to predicate-based +- Best practices for warning message writing + +## Ready for Architecture Review + +The implementation plans now provide: + +1. **Consistent Architecture:** All documents use the same predicate-based system +2. **Scalable Foundation:** MVP uses simplified predicate framework that extends to full system +3. **Real Examples:** GCP and Linode examples matching the original GitHub issues +4. **Complete Testing:** Comprehensive test examples for provider developers +5. **Clear Migration Path:** From MVP → Full Implementation → Stable Release + +Both the MVP (3 weeks) and full implementation (9 weeks) are ready for architectural review and team discussion. \ No newline at end of file diff --git a/mvp_field_delete_before_replace.md b/mvp_field_delete_before_replace.md new file mode 100644 index 000000000..1bc903799 --- /dev/null +++ b/mvp_field_delete_before_replace.md @@ -0,0 +1,731 @@ +# MVP: Field-Level DeleteBeforeReplace Implementation + +## Overview + +This document outlines a Minimum Viable Product (MVP) implementation for field-level conditional `DeleteBeforeReplace` functionality. The MVP focuses on proving the core concept with minimal complexity and risk. + +## MVP Scope + +### What's Included +- **Single Predicate**: `FieldChanged` predicate only +- **Warning-Only Behavior**: No automatic replacement behavior changes +- **Multiple Provider Support**: GCP and Linode examples +- **Basic Integration**: Core diff logic integration with predicate framework +- **Predicate-Based Configuration**: Foundation for future expansion + +### What's Excluded (Future Phases) +- Multiple predicate types (`HasAttachedResources`, `FieldEquals`, etc.) +- Automatic delete-before-replace application (`AutoApply` behavior) +- Complex predicate logic combinations (AND/OR) +- Plugin Framework support (SDKv2 only for MVP) +- Advanced predicates requiring resource metadata + +## Technical Design + +### Schema Changes + +```go +// Location: pkg/tfbridge/info/info.go + +// Add to existing Schema struct +type Schema struct { + // ... existing fields ... + ForceNew *bool + DeleteBeforeReplace *bool // Simple boolean case (unchanged) + DeleteBeforeReplaceFunc *DeleteBeforeReplaceEvaluator // NEW: Predicate-based evaluation +} + +// MVP: Predicate-based configuration (simplified for MVP) +type DeleteBeforeReplaceEvaluator struct { + Predicates []DeleteBeforeReplacePredicate // List of conditions to evaluate + Logic PredicateLogic // How to combine predicates (MVP: only "AND") + Behavior DeleteBeforeReplaceBehavior // MVP: only WarnUser + Reasoning string // Human-readable explanation +} + +type DeleteBeforeReplacePredicate struct { + Type string // MVP: only "fieldChanged" + Config map[string]interface{} // Predicate-specific configuration + Description string // Human-readable description for debugging +} + +type PredicateLogic struct { + Op string // MVP: only "AND" +} + +type DeleteBeforeReplaceBehavior int +const ( + WarnUser DeleteBeforeReplaceBehavior = iota // MVP: only warn user + // AutoApply // Future: automatic application +) +``` + +### Core Implementation + +```go +// Location: pkg/tfbridge/predicate_evaluator.go (NEW FILE) + +package tfbridge + +import ( + "fmt" + "github.com/pulumi/pulumi/sdk/v3/go/common/resource" +) + +// MVP: Predicate evaluation engine (simplified) +type PredicateEvaluator struct{} + +func NewPredicateEvaluator() *PredicateEvaluator { + return &PredicateEvaluator{} +} + +// Evaluate runs the predicate evaluation for a field +func (pe *PredicateEvaluator) Evaluate( + evaluator *DeleteBeforeReplaceEvaluator, + old, new resource.PropertyMap, + meta ResourceMetadata, +) (*EvaluationResult, error) { + + if evaluator == nil { + return nil, nil + } + + // MVP: Only support single "fieldChanged" predicate + var matchedPredicates []string + var reasoning []string + + for _, predicate := range evaluator.Predicates { + if predicate.Type == "fieldChanged" { + matched, reason, err := pe.evaluateFieldChanged(predicate, old, new) + if err != nil { + return nil, err + } + if matched { + matchedPredicates = append(matchedPredicates, predicate.Description) + reasoning = append(reasoning, reason) + } + } + } + + // MVP: Simple AND logic - all predicates must match + shouldWarn := len(matchedPredicates) == len(evaluator.Predicates) && len(matchedPredicates) > 0 + + return &EvaluationResult{ + ShouldDeleteBeforeReplace: shouldWarn, + Reasoning: reasoning, + MatchedPredicates: matchedPredicates, + }, nil +} + +// MVP: Field changed predicate implementation +func (pe *PredicateEvaluator) evaluateFieldChanged( + predicate DeleteBeforeReplacePredicate, + old, new resource.PropertyMap, +) (bool, string, error) { + + fieldName, ok := predicate.Config["field"].(string) + if !ok { + return false, "", fmt.Errorf("fieldChanged predicate missing 'field' config") + } + + oldValue := old[resource.PropertyKey(fieldName)] + newValue := new[resource.PropertyKey(fieldName)] + + changed := fieldChanged(oldValue, newValue) + reason := fmt.Sprintf("Field '%s' changed from %v to %v", fieldName, oldValue, newValue) + + return changed, reason, nil +} + +// Helper function to detect field changes +func fieldChanged(old, new resource.PropertyValue) bool { + // Handle nil cases + if old.IsNull() && new.IsNull() { + return false + } + if old.IsNull() != new.IsNull() { + return true + } + + // Compare values + return !old.DeepEquals(new) +} + +// Supporting types +type ResourceMetadata struct { + ProviderName string + ResourceType string + // MVP: Minimal metadata, extended in future phases +} + +type EvaluationResult struct { + ShouldDeleteBeforeReplace bool + Reasoning []string // Step-by-step reasoning + MatchedPredicates []string // Which predicates matched +} + +// Warning structure +type DeleteBeforeReplaceWarning struct { + Field string + Reason string + Recommendation string +} + +func (w *DeleteBeforeReplaceWarning) String() string { + return fmt.Sprintf( + "Warning: Field '%s' may require deleteBeforeReplace\n"+ + "Reason: %s\n"+ + "Recommendation: %s", + w.Field, w.Reason, w.Recommendation, + ) +} +``` + +### Integration with Diff Logic + +```go +// Location: pkg/tfbridge/detailed_diff.go +// Add to existing buildDetailedDiff function + +func (p *Provider) buildDetailedDiff( + olds, news resource.PropertyMap, + tfs shim.ResourceMap, + ps *SchemaInfo, + res *ResourceInfo, +) (*pulumirpc.DiffResponse, error) { + + // ... existing diff logic ... + + // NEW: Check for field-level delete-before-replace warnings + warnings := p.checkFieldDeleteBeforeReplaceWarnings(olds, news, res) + + // Add warnings to response (implementation depends on existing warning system) + for _, warning := range warnings { + // Add to logging or diff response metadata + glog.Warningf("DeleteBeforeReplace recommendation: %s", warning.String()) + } + + // ... rest of existing logic ... + + return result, nil +} + +// NEW: Check all fields for delete-before-replace warnings +func (p *Provider) checkFieldDeleteBeforeReplaceWarnings( + olds, news resource.PropertyMap, + res *ResourceInfo, +) []*DeleteBeforeReplaceWarning { + + var warnings []*DeleteBeforeReplaceWarning + checker := NewFieldDeleteBeforeReplaceChecker() + + // Iterate through all fields that have warning configuration + for fieldName, schemaInfo := range res.Fields { + if schemaInfo.WarnDeleteBeforeReplace == nil { + continue + } + + oldValue := olds[resource.PropertyKey(fieldName)] + newValue := news[resource.PropertyKey(fieldName)] + + if warning := checker.CheckField(fieldName, oldValue, newValue, schemaInfo.WarnDeleteBeforeReplace); warning != nil { + warnings = append(warnings, warning) + } + } + + return warnings +} +``` + +## Example Usage + +### Provider Configuration + +#### GCP Provider Example + +```go +// Example: In a GCP provider configuration file + +package main + +import ( + "github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfbridge" + "github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfbridge/info" +) + +func configureGCPProvider() *tfbridge.ProviderInfo { + return &tfbridge.ProviderInfo{ + // ... existing configuration ... + + Resources: map[string]*tfbridge.ResourceInfo{ + "google_compute_instance": { + Tok: "gcp:compute/instance:Instance", + Fields: map[string]*tfbridge.SchemaInfo{ + "enable_private_nodes": { + // Existing ForceNew behavior (unchanged) + ForceNew: &[]bool{true}[0], + + // NEW: Predicate-based delete-before-replace warning + DeleteBeforeReplaceFunc: &tfbridge.DeleteBeforeReplaceEvaluator{ + Predicates: []tfbridge.DeleteBeforeReplacePredicate{ + { + Type: "fieldChanged", + Config: map[string]interface{}{ + "field": "enable_private_nodes", + }, + Description: "Private nodes setting is changing", + }, + }, + Logic: tfbridge.PredicateLogic{Op: "AND"}, + Behavior: tfbridge.WarnUser, + Reasoning: "Private node configuration changes may conflict with attached network interfaces", + }, + }, + "machine_type": { + ForceNew: &[]bool{true}[0], + DeleteBeforeReplaceFunc: &tfbridge.DeleteBeforeReplaceEvaluator{ + Predicates: []tfbridge.DeleteBeforeReplacePredicate{ + { + Type: "fieldChanged", + Config: map[string]interface{}{ + "field": "machine_type", + }, + Description: "Machine type is changing", + }, + }, + Logic: tfbridge.PredicateLogic{Op: "AND"}, + Behavior: tfbridge.WarnUser, + Reasoning: "Machine type changes require instance replacement and may conflict with persistent resources", + }, + }, + }, + }, + }, + } +} +``` + +#### Linode Provider Example + +```go +// Example: In a Linode provider configuration file + +func configureLinodeProvider() *tfbridge.ProviderInfo { + return &tfbridge.ProviderInfo{ + // ... existing configuration ... + + Resources: map[string]*tfbridge.ResourceInfo{ + "linode_instance": { + Tok: "linode:index/instance:Instance", + Fields: map[string]*tfbridge.SchemaInfo{ + "root_pass": { + ForceNew: &[]bool{true}[0], + DeleteBeforeReplaceFunc: &tfbridge.DeleteBeforeReplaceEvaluator{ + Predicates: []tfbridge.DeleteBeforeReplacePredicate{ + { + Type: "fieldChanged", + Config: map[string]interface{}{ + "field": "root_pass", + }, + Description: "Root password is changing", + }, + }, + Logic: tfbridge.PredicateLogic{Op: "AND"}, + Behavior: tfbridge.WarnUser, + Reasoning: "Root password changes may require delete-before-replace to avoid configuration conflicts", + }, + }, + }, + }, + }, + } +} +``` + +### Expected User Experience + +When a user changes the `enable_private_nodes` field on a GCP compute instance: + +```bash +$ pulumi preview + +Previewing update (dev): + Type Name Plan + pulumi:pulumi:Stack my-stack-dev + ~ └─ gcp:compute:Instance my-instance update + +Resources: + ~ 1 to update + 1 unchanged + +Warning: Field 'enable_private_nodes' may require deleteBeforeReplace +Reason: Private node configuration changes may conflict with attached network interfaces +Recommendation: Consider setting 'deleteBeforeReplace: true' on this resource + +Do you want to perform this update? +``` + +## Test Implementation + +### Unit Tests + +```go +// Location: pkg/tfbridge/field_delete_before_replace_test.go (NEW FILE) + +package tfbridge + +import ( + "testing" + + "github.com/pulumi/pulumi/sdk/v3/go/common/resource" + "github.com/stretchr/testify/assert" +) + +func TestFieldDeleteBeforeReplaceChecker_NoConfigReturnsNil(t *testing.T) { + checker := NewFieldDeleteBeforeReplaceChecker() + + oldValue := resource.NewStringProperty("old-value") + newValue := resource.NewStringProperty("new-value") + + warning := checker.CheckField("test_field", oldValue, newValue, nil) + + assert.Nil(t, warning) +} + +func TestFieldDeleteBeforeReplaceChecker_DisabledReturnsNil(t *testing.T) { + checker := NewFieldDeleteBeforeReplaceChecker() + config := &FieldDeleteBeforeReplaceWarning{ + Enabled: false, + Reason: "Test reason", + } + + oldValue := resource.NewStringProperty("old-value") + newValue := resource.NewStringProperty("new-value") + + warning := checker.CheckField("test_field", oldValue, newValue, config) + + assert.Nil(t, warning) +} + +func TestFieldDeleteBeforeReplaceChecker_NoChangeReturnsNil(t *testing.T) { + checker := NewFieldDeleteBeforeReplaceChecker() + config := &FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "Test reason", + } + + sameValue := resource.NewStringProperty("same-value") + + warning := checker.CheckField("test_field", sameValue, sameValue, config) + + assert.Nil(t, warning) +} + +func TestFieldDeleteBeforeReplaceChecker_FieldChangedReturnsWarning(t *testing.T) { + checker := NewFieldDeleteBeforeReplaceChecker() + config := &FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "Field change may cause conflicts", + } + + oldValue := resource.NewStringProperty("old-value") + newValue := resource.NewStringProperty("new-value") + + warning := checker.CheckField("test_field", oldValue, newValue, config) + + assert.NotNil(t, warning) + assert.Equal(t, "test_field", warning.Field) + assert.Equal(t, "Field change may cause conflicts", warning.Reason) + assert.Contains(t, warning.Recommendation, "deleteBeforeReplace") +} + +func TestFieldDeleteBeforeReplaceChecker_NullToValueReturnsWarning(t *testing.T) { + checker := NewFieldDeleteBeforeReplaceChecker() + config := &FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "Test reason", + } + + oldValue := resource.NewNullProperty() + newValue := resource.NewStringProperty("new-value") + + warning := checker.CheckField("test_field", oldValue, newValue, config) + + assert.NotNil(t, warning) +} + +func TestFieldDeleteBeforeReplaceChecker_ValueToNullReturnsWarning(t *testing.T) { + checker := NewFieldDeleteBeforeReplaceChecker() + config := &FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "Test reason", + } + + oldValue := resource.NewStringProperty("old-value") + newValue := resource.NewNullProperty() + + warning := checker.CheckField("test_field", oldValue, newValue, config) + + assert.NotNil(t, warning) +} + +func TestFieldDeleteBeforeReplaceChecker_BothNullReturnsNil(t *testing.T) { + checker := NewFieldDeleteBeforeReplaceChecker() + config := &FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "Test reason", + } + + oldValue := resource.NewNullProperty() + newValue := resource.NewNullProperty() + + warning := checker.CheckField("test_field", oldValue, newValue, config) + + assert.Nil(t, warning) +} + +func TestFieldChanged_StringValues(t *testing.T) { + tests := []struct { + name string + old resource.PropertyValue + new resource.PropertyValue + expected bool + }{ + { + name: "same string values", + old: resource.NewStringProperty("test"), + new: resource.NewStringProperty("test"), + expected: false, + }, + { + name: "different string values", + old: resource.NewStringProperty("old"), + new: resource.NewStringProperty("new"), + expected: true, + }, + { + name: "empty to non-empty", + old: resource.NewStringProperty(""), + new: resource.NewStringProperty("value"), + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := fieldChanged(tt.old, tt.new) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestFieldChanged_BooleanValues(t *testing.T) { + tests := []struct { + name string + old resource.PropertyValue + new resource.PropertyValue + expected bool + }{ + { + name: "same boolean values true", + old: resource.NewBoolProperty(true), + new: resource.NewBoolProperty(true), + expected: false, + }, + { + name: "same boolean values false", + old: resource.NewBoolProperty(false), + new: resource.NewBoolProperty(false), + expected: false, + }, + { + name: "different boolean values", + old: resource.NewBoolProperty(true), + new: resource.NewBoolProperty(false), + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := fieldChanged(tt.old, tt.new) + assert.Equal(t, tt.expected, result) + }) + } +} +``` + +### Integration Test + +```go +// Location: pkg/tfbridge/detailed_diff_delete_before_replace_test.go (NEW FILE) + +package tfbridge + +import ( + "testing" + + "github.com/pulumi/pulumi/sdk/v3/go/common/resource" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestProvider_checkFieldDeleteBeforeReplaceWarnings_NoWarningsWhenNoConfig(t *testing.T) { + provider := &Provider{} + + olds := resource.PropertyMap{ + "field1": resource.NewStringProperty("old-value"), + } + news := resource.PropertyMap{ + "field1": resource.NewStringProperty("new-value"), + } + + res := &ResourceInfo{ + Fields: map[string]*SchemaInfo{ + "field1": { + // No WarnDeleteBeforeReplace configuration + }, + }, + } + + warnings := provider.checkFieldDeleteBeforeReplaceWarnings(olds, news, res) + + assert.Empty(t, warnings) +} + +func TestProvider_checkFieldDeleteBeforeReplaceWarnings_GeneratesWarningForConfiguredField(t *testing.T) { + provider := &Provider{} + + olds := resource.PropertyMap{ + "enable_private_nodes": resource.NewBoolProperty(false), + } + news := resource.PropertyMap{ + "enable_private_nodes": resource.NewBoolProperty(true), + } + + res := &ResourceInfo{ + Fields: map[string]*SchemaInfo{ + "enable_private_nodes": { + WarnDeleteBeforeReplace: &FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "Private node changes may conflict with network interfaces", + }, + }, + }, + } + + warnings := provider.checkFieldDeleteBeforeReplaceWarnings(olds, news, res) + + require.Len(t, warnings, 1) + assert.Equal(t, "enable_private_nodes", warnings[0].Field) + assert.Equal(t, "Private node changes may conflict with network interfaces", warnings[0].Reason) + assert.Contains(t, warnings[0].Recommendation, "deleteBeforeReplace") +} + +func TestProvider_checkFieldDeleteBeforeReplaceWarnings_NoWarningWhenFieldUnchanged(t *testing.T) { + provider := &Provider{} + + sameValue := resource.NewBoolProperty(true) + olds := resource.PropertyMap{ + "enable_private_nodes": sameValue, + } + news := resource.PropertyMap{ + "enable_private_nodes": sameValue, + } + + res := &ResourceInfo{ + Fields: map[string]*SchemaInfo{ + "enable_private_nodes": { + WarnDeleteBeforeReplace: &FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "Test reason", + }, + }, + }, + } + + warnings := provider.checkFieldDeleteBeforeReplaceWarnings(olds, news, res) + + assert.Empty(t, warnings) +} + +func TestProvider_checkFieldDeleteBeforeReplaceWarnings_MultipleFields(t *testing.T) { + provider := &Provider{} + + olds := resource.PropertyMap{ + "enable_private_nodes": resource.NewBoolProperty(false), + "machine_type": resource.NewStringProperty("n1-standard-1"), + "other_field": resource.NewStringProperty("unchanged"), + } + news := resource.PropertyMap{ + "enable_private_nodes": resource.NewBoolProperty(true), + "machine_type": resource.NewStringProperty("n1-standard-2"), + "other_field": resource.NewStringProperty("unchanged"), + } + + res := &ResourceInfo{ + Fields: map[string]*SchemaInfo{ + "enable_private_nodes": { + WarnDeleteBeforeReplace: &FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "Private node changes may conflict", + }, + }, + "machine_type": { + WarnDeleteBeforeReplace: &FieldDeleteBeforeReplaceWarning{ + Enabled: true, + Reason: "Machine type changes require special handling", + }, + }, + "other_field": { + // No warning configuration + }, + }, + } + + warnings := provider.checkFieldDeleteBeforeReplaceWarnings(olds, news, res) + + require.Len(t, warnings, 2) + + // Should have warnings for both changed fields with configuration + fieldNames := []string{warnings[0].Field, warnings[1].Field} + assert.Contains(t, fieldNames, "enable_private_nodes") + assert.Contains(t, fieldNames, "machine_type") +} +``` + +## Implementation Steps + +### Phase 1: Foundation (Week 1) +1. Add `FieldDeleteBeforeReplaceWarning` struct to `pkg/tfbridge/info/info.go` +2. Create `pkg/tfbridge/field_delete_before_replace.go` with core logic +3. Add unit tests for field change detection and warning generation + +### Phase 2: Integration (Week 2) +1. Integrate warning check into `pkg/tfbridge/detailed_diff.go` +2. Add integration tests +3. Test with simple scenarios + +### Phase 3: Provider Configuration (Week 3) +1. Configure GCP provider with example field warnings +2. End-to-end testing with real GCP resources +3. Documentation and examples + +## Benefits of This MVP + +1. **Low Risk**: Only adds warnings, no behavior changes +2. **Validates Architecture**: Proves the field-level concept works +3. **Quick Implementation**: Simple scope, ~3 weeks +4. **Immediate Value**: Helps users understand when deleteBeforeReplace might be needed +5. **Foundation for Future**: Easy to extend with more predicates and automatic behavior + +## Success Criteria + +- [ ] Field-level warning configuration works +- [ ] Warnings appear when configured fields change +- [ ] No warnings when fields don't change +- [ ] Zero regression in existing provider behavior +- [ ] Clean integration with existing diff logic +- [ ] Comprehensive test coverage (>90%) + +This MVP provides a solid foundation for the full predicate-based system while delivering immediate value to users and validating the core architectural concepts. \ No newline at end of file