|
| 1 | +# patterns-error-handling |
| 2 | + |
| 3 | +Error wrapping, prefixes, and distinguishing retryable from fatal errors. |
| 4 | + |
| 5 | +--- |
| 6 | + |
| 7 | +## Error Prefix Convention |
| 8 | + |
| 9 | +All errors must include connector name: |
| 10 | + |
| 11 | +```go |
| 12 | +return fmt.Errorf("baton-myservice: failed to list users: %w", err) |
| 13 | +``` |
| 14 | + |
| 15 | +Pattern: `baton-{service}: {action}: %w` |
| 16 | + |
| 17 | +**Why:** When errors surface in logs or UI, operators need to know which connector failed. |
| 18 | + |
| 19 | +--- |
| 20 | + |
| 21 | +## Error Wrapping with %w |
| 22 | + |
| 23 | +**Correct - preserves error chain:** |
| 24 | +```go |
| 25 | +if err != nil { |
| 26 | + return nil, fmt.Errorf("baton-myservice: failed to list users: %w", err) |
| 27 | +} |
| 28 | +``` |
| 29 | + |
| 30 | +**Wrong - breaks error chain:** |
| 31 | +```go |
| 32 | +if err != nil { |
| 33 | + return nil, fmt.Errorf("baton-myservice: failed to list users: %v", err) |
| 34 | +} |
| 35 | +``` |
| 36 | + |
| 37 | +**Why %w matters:** SDK uses `errors.Is()` and `errors.As()` to detect specific error types like rate limits. Without `%w`, detection fails. |
| 38 | + |
| 39 | +--- |
| 40 | + |
| 41 | +## Retryable vs Fatal Errors |
| 42 | + |
| 43 | +| Error Type | Retryable? | Action | |
| 44 | +|------------|-----------|--------| |
| 45 | +| Rate limit (429) | Yes | SDK retries automatically | |
| 46 | +| Network timeout | Yes | SDK retries | |
| 47 | +| Server error (5xx) | Yes | SDK retries | |
| 48 | +| Bad request (400) | No | Log details, fail | |
| 49 | +| Unauthorized (401) | No | Check credentials | |
| 50 | +| Forbidden (403) | No | Check permissions | |
| 51 | +| Not found (404) | Depends | Often skip, not error | |
| 52 | + |
| 53 | +--- |
| 54 | + |
| 55 | +## Error Detection Pattern |
| 56 | + |
| 57 | +```go |
| 58 | +func (u *userBuilder) List(ctx context.Context, parentID *v2.ResourceId, |
| 59 | + token *pagination.Token) ([]*v2.Resource, string, annotations.Annotations, error) { |
| 60 | + |
| 61 | + users, err := u.client.ListUsers(ctx) |
| 62 | + if err != nil { |
| 63 | + // Check for specific error types |
| 64 | + if isRateLimitError(err) { |
| 65 | + // SDK handles retry - just return the error |
| 66 | + return nil, "", nil, err |
| 67 | + } |
| 68 | + if isAuthError(err) { |
| 69 | + // Fatal - clear message for operator |
| 70 | + return nil, "", nil, fmt.Errorf("baton-myservice: authentication failed (check credentials): %w", err) |
| 71 | + } |
| 72 | + // Generic error |
| 73 | + return nil, "", nil, fmt.Errorf("baton-myservice: failed to list users: %w", err) |
| 74 | + } |
| 75 | + // ... |
| 76 | +} |
| 77 | + |
| 78 | +func isRateLimitError(err error) bool { |
| 79 | + var httpErr *HTTPError |
| 80 | + if errors.As(err, &httpErr) { |
| 81 | + return httpErr.StatusCode == 429 |
| 82 | + } |
| 83 | + return false |
| 84 | +} |
| 85 | +``` |
| 86 | + |
| 87 | +--- |
| 88 | + |
| 89 | +## Context Cancellation |
| 90 | + |
| 91 | +Always respect context cancellation: |
| 92 | + |
| 93 | +```go |
| 94 | +func (u *userBuilder) List(ctx context.Context, parentID *v2.ResourceId, |
| 95 | + token *pagination.Token) ([]*v2.Resource, string, annotations.Annotations, error) { |
| 96 | + |
| 97 | + users, err := u.client.ListUsers(ctx) |
| 98 | + if err != nil { |
| 99 | + return nil, "", nil, err |
| 100 | + } |
| 101 | + |
| 102 | + var resources []*v2.Resource |
| 103 | + for _, user := range users { |
| 104 | + // Check for cancellation in loops |
| 105 | + select { |
| 106 | + case <-ctx.Done(): |
| 107 | + return nil, "", nil, ctx.Err() |
| 108 | + default: |
| 109 | + } |
| 110 | + |
| 111 | + resource, err := createResource(user) |
| 112 | + if err != nil { |
| 113 | + return nil, "", nil, err |
| 114 | + } |
| 115 | + resources = append(resources, resource) |
| 116 | + } |
| 117 | + |
| 118 | + return resources, "", nil, nil |
| 119 | +} |
| 120 | +``` |
| 121 | + |
| 122 | +**Why:** Cancelled context means "stop now" - user cancelled, timeout reached. Ignoring it wastes quota and causes zombie requests. |
| 123 | + |
| 124 | +--- |
| 125 | + |
| 126 | +## Don't Swallow Errors |
| 127 | + |
| 128 | +**Wrong - silent failure:** |
| 129 | +```go |
| 130 | +users, err := client.ListUsers(ctx) |
| 131 | +if err != nil { |
| 132 | + log.Println("error listing users:", err) |
| 133 | + // Continues with empty users - silent data loss! |
| 134 | +} |
| 135 | +``` |
| 136 | + |
| 137 | +**Correct - propagate error:** |
| 138 | +```go |
| 139 | +users, err := client.ListUsers(ctx) |
| 140 | +if err != nil { |
| 141 | + return nil, "", nil, fmt.Errorf("baton-myservice: failed to list users: %w", err) |
| 142 | +} |
| 143 | +``` |
| 144 | + |
| 145 | +--- |
| 146 | + |
| 147 | +## Partial Success Handling |
| 148 | + |
| 149 | +**For sync (fail fast):** |
| 150 | +```go |
| 151 | +for _, item := range items { |
| 152 | + if err := process(item); err != nil { |
| 153 | + return err // Stop on first error |
| 154 | + } |
| 155 | +} |
| 156 | +``` |
| 157 | + |
| 158 | +**For provisioning (collect errors):** |
| 159 | +```go |
| 160 | +var errs []error |
| 161 | +for _, item := range items { |
| 162 | + if err := process(item); err != nil { |
| 163 | + errs = append(errs, fmt.Errorf("item %s: %w", item.ID, err)) |
| 164 | + } |
| 165 | +} |
| 166 | +if len(errs) > 0 { |
| 167 | + return errors.Join(errs...) |
| 168 | +} |
| 169 | +``` |
| 170 | + |
| 171 | +--- |
| 172 | + |
| 173 | +## Error Message Quality |
| 174 | + |
| 175 | +**Bad - no context:** |
| 176 | +```go |
| 177 | +return fmt.Errorf("failed") |
| 178 | +``` |
| 179 | + |
| 180 | +**Bad - redundant "error":** |
| 181 | +```go |
| 182 | +return fmt.Errorf("error: failed to list users") |
| 183 | +``` |
| 184 | + |
| 185 | +**Good - specific and actionable:** |
| 186 | +```go |
| 187 | +return fmt.Errorf("baton-myservice: failed to list users (page %d): %w", page, err) |
| 188 | +``` |
| 189 | + |
| 190 | +Include: |
| 191 | +- Connector name |
| 192 | +- Action being performed |
| 193 | +- Relevant IDs/context |
| 194 | +- Original error via %w |
0 commit comments