|
| 1 | +# build-pagination |
| 2 | + |
| 3 | +Pagination is critical. Enterprise environments have tens of thousands of users. |
| 4 | + |
| 5 | +--- |
| 6 | + |
| 7 | +## Why Pagination Matters |
| 8 | + |
| 9 | +- SDK checkpoints every 10 seconds |
| 10 | +- Without pagination, interrupted syncs restart from zero |
| 11 | +- Memory exhaustion on large datasets |
| 12 | +- API rate limits easier to handle page-by-page |
| 13 | + |
| 14 | +**Always implement pagination, even for small datasets in testing.** |
| 15 | + |
| 16 | +--- |
| 17 | + |
| 18 | +## pagination.Bag Pattern |
| 19 | + |
| 20 | +```go |
| 21 | +func (u *userBuilder) List(ctx context.Context, parentID *v2.ResourceId, |
| 22 | + token *pagination.Token) ([]*v2.Resource, string, annotations.Annotations, error) { |
| 23 | + |
| 24 | + // 1. Unmarshal the bag |
| 25 | + bag := &pagination.Bag{} |
| 26 | + if err := bag.Unmarshal(token.Token); err != nil { |
| 27 | + return nil, "", nil, err |
| 28 | + } |
| 29 | + |
| 30 | + // 2. Initialize on first call |
| 31 | + if bag.Current() == nil { |
| 32 | + bag.Push(pagination.PageState{ |
| 33 | + ResourceTypeID: userResourceType.Id, |
| 34 | + }) |
| 35 | + } |
| 36 | + |
| 37 | + // 3. Get current page token |
| 38 | + pageToken := bag.PageToken() |
| 39 | + |
| 40 | + // 4. Fetch one page from API |
| 41 | + users, nextCursor, err := u.client.ListUsers(ctx, pageToken, 100) |
| 42 | + if err != nil { |
| 43 | + return nil, "", nil, err |
| 44 | + } |
| 45 | + |
| 46 | + // 5. Process results |
| 47 | + var resources []*v2.Resource |
| 48 | + for _, user := range users { |
| 49 | + // ... create resources |
| 50 | + } |
| 51 | + |
| 52 | + // 6. Create next token |
| 53 | + nextPage, err := bag.NextToken(nextCursor) |
| 54 | + if err != nil { |
| 55 | + return nil, "", nil, err |
| 56 | + } |
| 57 | + |
| 58 | + return resources, nextPage, nil, nil |
| 59 | +} |
| 60 | +``` |
| 61 | + |
| 62 | +--- |
| 63 | + |
| 64 | +## Two Failure Modes (Both Critical) |
| 65 | + |
| 66 | +### 1. Early Termination - Misses Data |
| 67 | + |
| 68 | +```go |
| 69 | +// WRONG - always stops after first page |
| 70 | +func (u *userBuilder) List(...) (...) { |
| 71 | + users, _, _ := client.ListUsers(ctx, pageToken, 100) |
| 72 | + // ... process users ... |
| 73 | + return resources, "", nil, nil // Always empty token! |
| 74 | +} |
| 75 | +``` |
| 76 | + |
| 77 | +**Result**: Only first page synced. Silent data loss. |
| 78 | + |
| 79 | +### 2. Infinite Loop - Never Stops |
| 80 | + |
| 81 | +```go |
| 82 | +// WRONG - hardcoded token |
| 83 | +return resources, "next", nil, nil // Always returns "next" |
| 84 | + |
| 85 | +// WRONG - result count termination |
| 86 | +for { |
| 87 | + results, _ := client.List(offset) |
| 88 | + if len(results) < pageSize { |
| 89 | + break // Empty page doesn't trigger break! |
| 90 | + } |
| 91 | + offset += pageSize // Runs forever |
| 92 | +} |
| 93 | +``` |
| 94 | + |
| 95 | +**Result**: Sync hangs. Resource exhaustion. 5 production fixes for this pattern. |
| 96 | + |
| 97 | +### Correct Pattern - Token Passthrough |
| 98 | + |
| 99 | +```go |
| 100 | +// CORRECT - pass through API's token |
| 101 | +func (u *userBuilder) List(...) (...) { |
| 102 | + users, nextCursor, err := client.ListUsers(ctx, pageToken, 100) |
| 103 | + if err != nil { |
| 104 | + return nil, "", nil, err |
| 105 | + } |
| 106 | + |
| 107 | + // ... process users ... |
| 108 | + |
| 109 | + // Pass through exactly what API returned |
| 110 | + nextPage, err := bag.NextToken(nextCursor) |
| 111 | + if err != nil { |
| 112 | + return nil, "", nil, err |
| 113 | + } |
| 114 | + |
| 115 | + return resources, nextPage, nil, nil |
| 116 | +} |
| 117 | +``` |
| 118 | + |
| 119 | +**Key**: `nextCursor` comes from API response. When API has no more pages, it returns empty string. You pass that through. |
| 120 | + |
| 121 | +--- |
| 122 | + |
| 123 | +## Choosing Pagination Strategy |
| 124 | + |
| 125 | +**Read the API docs first.** Using wrong strategy = bugs. |
| 126 | + |
| 127 | +| API Signal | Strategy | Notes | |
| 128 | +|------------|----------|-------| |
| 129 | +| Returns `next_cursor`, `cursor`, `page_token` | Cursor | Preferred. Opaque token. | |
| 130 | +| Returns `Link` header with `rel="next"` | Link header | GitHub, some REST APIs | |
| 131 | +| Returns `total_count` and supports `offset` | Offset | Requires math. Error-prone. | |
| 132 | +| Returns `has_more` boolean | Cursor variant | Use with cursor or offset | |
| 133 | +| No pagination info returned | Check docs! | API may have undocumented pagination | |
| 134 | + |
| 135 | +### Cursor-based (preferred) |
| 136 | +```go |
| 137 | +// API: /users?cursor=abc123 |
| 138 | +resp, err := client.ListUsers(ctx, cursor, pageSize) |
| 139 | +nextCursor := resp.NextCursor // Use directly - opaque token |
| 140 | +``` |
| 141 | + |
| 142 | +### Offset-based (error-prone) |
| 143 | +```go |
| 144 | +// API: /users?offset=100&limit=50 |
| 145 | +// Must track offset yourself |
| 146 | +type offsetToken struct { |
| 147 | + Offset int `json:"offset"` |
| 148 | +} |
| 149 | + |
| 150 | +// DANGER: If items added/removed during sync, you skip or duplicate |
| 151 | +// Only use if API doesn't support cursor |
| 152 | +``` |
| 153 | + |
| 154 | +### Link header (GitHub style) |
| 155 | +```go |
| 156 | +// Parse Link header for "next" URL |
| 157 | +linkHeader := resp.Header.Get("Link") |
| 158 | +nextURL := parseLinkHeader(linkHeader, "next") |
| 159 | +// Extract cursor from URL query params |
| 160 | +``` |
| 161 | + |
| 162 | +### Page number-based (least preferred) |
| 163 | +```go |
| 164 | +// API: /users?page=3&per_page=50 |
| 165 | +// Same problems as offset - avoid if possible |
| 166 | +``` |
| 167 | + |
| 168 | +**When in doubt, use cursor if API supports it.** |
| 169 | + |
| 170 | +--- |
| 171 | + |
| 172 | +## SDK Validation |
| 173 | + |
| 174 | +The SDK validates pagination: |
| 175 | + |
| 176 | +```go |
| 177 | +// SDK checks that page tokens change between calls |
| 178 | +// If same token returned twice, SDK fails with: |
| 179 | +// "next page token is the same as current - connector bug" |
| 180 | +``` |
| 181 | + |
| 182 | +This catches infinite loops caused by incorrect termination. |
| 183 | + |
| 184 | +--- |
| 185 | + |
| 186 | +## Multi-Resource Pagination |
| 187 | + |
| 188 | +For Grants() that traverse multiple resource types: |
| 189 | + |
| 190 | +```go |
| 191 | +func (g *groupBuilder) Grants(ctx context.Context, resource *v2.Resource, |
| 192 | + token *pagination.Token) ([]*v2.Grant, string, annotations.Annotations, error) { |
| 193 | + |
| 194 | + bag := &pagination.Bag{} |
| 195 | + if err := bag.Unmarshal(token.Token); err != nil { |
| 196 | + return nil, "", nil, err |
| 197 | + } |
| 198 | + |
| 199 | + // Initialize with multiple pages to traverse |
| 200 | + if bag.Current() == nil { |
| 201 | + bag.Push(pagination.PageState{ResourceTypeID: "direct_members"}) |
| 202 | + bag.Push(pagination.PageState{ResourceTypeID: "nested_groups"}) |
| 203 | + } |
| 204 | + |
| 205 | + switch bag.ResourceTypeID() { |
| 206 | + case "direct_members": |
| 207 | + // Handle direct members |
| 208 | + // When done, bag.Next() pops to "nested_groups" |
| 209 | + case "nested_groups": |
| 210 | + // Handle nested groups |
| 211 | + } |
| 212 | + |
| 213 | + // ... |
| 214 | +} |
| 215 | +``` |
| 216 | + |
| 217 | +--- |
| 218 | + |
| 219 | +## Page Size Selection |
| 220 | + |
| 221 | +| API Limit | Recommendation | |
| 222 | +|-----------|----------------| |
| 223 | +| No limit | Use 100-200 | |
| 224 | +| Has limit | Use slightly under limit | |
| 225 | +| Rate limited | Smaller pages, more frequent checkpoints | |
| 226 | + |
| 227 | +**Don't hardcode arbitrary sizes:** |
| 228 | +```go |
| 229 | +// WRONG |
| 230 | +const pageSize = 10 // Too small, many API calls |
| 231 | +const pageSize = 10000 // Too large, memory pressure |
| 232 | + |
| 233 | +// CORRECT - check API docs |
| 234 | +const pageSize = 200 // Google Workspace limit |
| 235 | +const pageSize = 100 // GitHub API recommendation |
| 236 | +``` |
0 commit comments