Skip to content

Commit f32a349

Browse files
authored
Merge pull request #458 from hookdeck/feat/body-logging
feat: improve observability for destinations API 5xx error investigation
2 parents 8683052 + 7b5182e commit f32a349

File tree

9 files changed

+1232
-11
lines changed

9 files changed

+1232
-11
lines changed

.plans/redis-error/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
This could be useful if we see other errors. However, if we don't, this directory should be removed.
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#!/bin/bash
2+
3+
# Script to reproduce Redis EXECABORT error by causing marshaling failures
4+
# in destination.Credentials.MarshalBinary() during Redis transaction
5+
6+
set -e
7+
8+
# Configuration
9+
BASE_URL="http://localhost:3333/api/v1"
10+
TENANT_ID="test-tenant"
11+
API_KEY="apikey" # Replace with actual API key
12+
13+
echo "=== Redis EXECABORT Reproduction Script ==="
14+
echo "This script creates API requests that cause json.Marshal() failures"
15+
echo "in destination.Credentials.MarshalBinary() during Redis transactions"
16+
echo ""
17+
18+
# Function to make API request
19+
make_request() {
20+
local payload="$1"
21+
local description="$2"
22+
23+
echo "Testing: $description"
24+
echo "Payload: $payload"
25+
echo ""
26+
27+
curl -X POST \
28+
-H "Content-Type: application/json" \
29+
-H "Authorization: Bearer $API_KEY" \
30+
-d "$payload" \
31+
"$BASE_URL/$TENANT_ID/destinations" \
32+
-v 2>&1 | head -20
33+
34+
echo ""
35+
echo "---"
36+
echo ""
37+
}
38+
39+
# Test Case 1: Invalid UTF-8 sequences in credentials
40+
# JSON marshaling fails when encountering invalid UTF-8 bytes
41+
echo "=== Test Case 1: Invalid UTF-8 in credentials ==="
42+
PAYLOAD_UTF8='{
43+
"type": "webhook",
44+
"topics": ["test.event"],
45+
"config": {
46+
"url": "https://example.com/webhook"
47+
},
48+
"credentials": {
49+
"secret": "invalid_utf8_\xff\xfe\xfd",
50+
"key": "test\x00null_byte"
51+
}
52+
}'
53+
make_request "$PAYLOAD_UTF8" "Invalid UTF-8 sequences in credentials"
54+
55+
# Test Case 2: Extremely large credential values that may cause memory issues
56+
echo "=== Test Case 2: Oversized credential values ==="
57+
LARGE_STRING=$(printf 'A%.0s' {1..1048576}) # 1MB string
58+
PAYLOAD_LARGE='{
59+
"type": "webhook",
60+
"topics": ["test.event"],
61+
"config": {
62+
"url": "https://example.com/webhook"
63+
},
64+
"credentials": {
65+
"secret": "'"$LARGE_STRING"'",
66+
"large_data": "'"$LARGE_STRING"'"
67+
}
68+
}'
69+
make_request "$PAYLOAD_LARGE" "Oversized credential values (1MB each)"
70+
71+
# Test Case 3: Credentials containing control characters
72+
echo "=== Test Case 3: Control characters in credentials ==="
73+
PAYLOAD_CONTROL='{
74+
"type": "webhook",
75+
"topics": ["test.event"],
76+
"config": {
77+
"url": "https://example.com/webhook"
78+
},
79+
"credentials": {
80+
"secret": "control_chars_\u0000\u0001\u0002\u001f",
81+
"data": "tabs_and_newlines\t\n\r\f\b"
82+
}
83+
}'
84+
make_request "$PAYLOAD_CONTROL" "Control characters in credentials"
85+
86+
# Test Case 4: Credentials with nested JSON that becomes malformed when stringified
87+
echo "=== Test Case 4: Malformed nested JSON structures ==="
88+
PAYLOAD_NESTED='{
89+
"type": "webhook",
90+
"topics": ["test.event"],
91+
"config": {
92+
"url": "https://example.com/webhook"
93+
},
94+
"credentials": {
95+
"secret": "whsec_abc123",
96+
"nested_json": "{\"incomplete\": \"json\", \"missing\":",
97+
"circular": "self_reference"
98+
}
99+
}'
100+
make_request "$PAYLOAD_NESTED" "Malformed nested JSON in credentials"
101+
102+
# Test Case 5: Very deep nested object structures (potential stack overflow)
103+
echo "=== Test Case 5: Extremely deep nesting ==="
104+
DEEP_JSON=""
105+
for i in {1..1000}; do
106+
DEEP_JSON="{\"level$i\":$DEEP_JSON}"
107+
done
108+
DEEP_JSON="$DEEP_JSON$(printf '}%.0s' {1..1000})"
109+
110+
PAYLOAD_DEEP='{
111+
"type": "webhook",
112+
"topics": ["test.event"],
113+
"config": {
114+
"url": "https://example.com/webhook"
115+
},
116+
"credentials": {
117+
"secret": "whsec_abc123",
118+
"deep_structure": "'"$DEEP_JSON"'"
119+
}
120+
}'
121+
make_request "$PAYLOAD_DEEP" "Extremely deep nested structure"
122+
123+
echo "=== Test Complete ==="
124+
echo ""
125+
echo "The above requests are designed to trigger json.Marshal() failures during"
126+
echo "the Redis transaction in entity.go:292, causing EXECABORT errors."
127+
echo ""
128+
echo "Expected behavior:"
129+
echo "- Invalid UTF-8 should cause json.Marshal to fail with invalid UTF-8 error"
130+
echo "- Oversized payloads may cause memory allocation failures"
131+
echo "- Control characters may cause marshaling issues"
132+
echo "- Malformed JSON strings may cause parsing issues during unmarshaling"
133+
echo "- Deep nesting may cause stack overflow during marshaling"
134+
echo ""
135+
echo "Monitor Redis logs and application logs for EXECABORT errors and"
136+
echo "marshaling failure messages."
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# Redis EXECABORT Reproduction Guide
2+
3+
Based on analysis of the codebase, the Redis EXECABORT error occurs in [`entity.go:292`](internal/models/entity.go:292) when `destination.Credentials.MarshalBinary()` fails during the Redis transaction.
4+
5+
## Root Cause
6+
7+
The failure path is:
8+
1. POST request to `/api/v1/{tenant_id}/destinations`
9+
2. [`destination_handlers.go:99`](internal/services/api/destination_handlers.go:99) calls `h.entityStore.CreateDestination()`
10+
3. [`entity.go:291`](internal/models/entity.go:291) starts Redis transaction with `TxPipelined()`
11+
4. [`entity.go:292`](internal/models/entity.go:292) calls `destination.Credentials.MarshalBinary()`
12+
5. [`destination.go:176`](internal/models/destination.go:176) calls `json.Marshal(m)` on the credentials map
13+
6. If `json.Marshal()` fails, the Redis transaction aborts with EXECABORT
14+
15+
## Most Effective Reproduction Methods
16+
17+
### Method 1: Invalid UTF-8 Characters (MOST LIKELY TO SUCCEED)
18+
19+
```bash
20+
curl -X POST \
21+
-H "Content-Type: application/json" \
22+
-H "Authorization: Bearer apiKey" \
23+
-d '{
24+
"type": "webhook",
25+
"topics": ["test.event"],
26+
"config": {
27+
"url": "https://example.com/webhook"
28+
},
29+
"credentials": {
30+
"secret": "invalid_utf8_\xFF\xFE\xFD",
31+
"key": "contains\x00null\x01bytes"
32+
}
33+
}' \
34+
http://localhost:3333/api/v1/test-tenant/destinations
35+
```
36+
37+
### Method 2: Non-Printable Control Characters
38+
39+
```bash
40+
curl -X POST \
41+
-H "Content-Type: application/json" \
42+
-H "Authorization: Bearer apiKey" \
43+
-d '{
44+
"type": "webhook",
45+
"topics": ["test.event"],
46+
"config": {
47+
"url": "https://example.com/webhook"
48+
},
49+
"credentials": {
50+
"secret": "control_chars_\u0000\u0001\u0002\u001f",
51+
"data": "bell\u0007backspace\u0008"
52+
}
53+
}' \
54+
http://localhost:3333/api/v1/test-tenant/destinations
55+
```
56+
57+
### Method 3: Oversized Payload (Memory Exhaustion)
58+
59+
```bash
60+
# Create a very large credential value
61+
LARGE_VALUE=$(python3 -c "print('A' * 10485760)") # 10MB string
62+
63+
curl -X POST \
64+
-H "Content-Type: application/json" \
65+
-H "Authorization: Bearer apiKey" \
66+
-d '{
67+
"type": "webhook",
68+
"topics": ["test.event"],
69+
"config": {
70+
"url": "https://example.com/webhook"
71+
},
72+
"credentials": {
73+
"secret": "whsec_abc123",
74+
"large_data": "'$LARGE_VALUE'"
75+
}
76+
}' \
77+
http://localhost:3333/api/v1/test-tenant/destinations
78+
```
79+
80+
## Expected Results
81+
82+
When `json.Marshal()` fails in the Redis transaction, you should see:
83+
84+
1. **In Application Logs:**
85+
- Error from `json.Marshal()` (e.g., "invalid UTF-8 in string")
86+
- Redis transaction failure
87+
- 500 Internal Server Error returned to client
88+
89+
2. **In Redis Logs:**
90+
- `EXECABORT` message
91+
- Transaction rollback
92+
93+
3. **Client Response:**
94+
- HTTP 500 status code
95+
- Internal server error response
96+
97+
## Why These Methods Work
98+
99+
- **Invalid UTF-8**: Go's `json.Marshal()` validates UTF-8 and fails on invalid sequences
100+
- **Control Characters**: Some control chars cannot be marshaled to valid JSON
101+
- **Oversized Payloads**: May cause memory allocation failures during marshaling
102+
103+
The key insight is that since `Credentials` is a `map[string]string`, the marshaling failure must come from the string values themselves being unmarshalable, not from complex object structures.

internal/models/entity.go

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -288,19 +288,24 @@ func (m *entityStoreImpl) CreateDestination(ctx context.Context, destination Des
288288

289289
func (m *entityStoreImpl) UpsertDestination(ctx context.Context, destination Destination) error {
290290
key := redisDestinationID(destination.ID, destination.TenantID)
291-
_, err := m.redisClient.TxPipelined(ctx, func(r redis.Pipeliner) error {
292-
credentialsBytes, err := destination.Credentials.MarshalBinary()
293-
if err != nil {
294-
return err
295-
}
296-
encryptedCredentials, err := m.cipher.Encrypt(credentialsBytes)
297-
if err != nil {
298-
return err
299-
}
291+
292+
// Pre-marshal and encrypt credentials BEFORE starting Redis transaction
293+
// This isolates marshaling failures from Redis transaction failures
294+
credentialsBytes, err := destination.Credentials.MarshalBinary()
295+
if err != nil {
296+
return fmt.Errorf("invalid destination credentials: %w", err)
297+
}
298+
encryptedCredentials, err := m.cipher.Encrypt(credentialsBytes)
299+
if err != nil {
300+
return fmt.Errorf("failed to encrypt destination credentials: %w", err)
301+
}
302+
303+
// All marshaling and encryption successful - now perform Redis operations
304+
_, err = m.redisClient.TxPipelined(ctx, func(r redis.Pipeliner) error {
300305
// Support overriding deleted resources
301306
r.Persist(ctx, key)
302307
r.HDel(ctx, key, "deleted_at")
303-
// Set the new destination values
308+
// Set the new destination values with pre-processed data
304309
r.HSet(ctx, key, "id", destination.ID)
305310
r.HSet(ctx, key, "type", destination.Type)
306311
r.HSet(ctx, key, "topics", &destination.Topics)

internal/services/api/logger_middleware.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ import (
1414
)
1515

1616
func LoggerMiddleware(logger *logging.Logger) gin.HandlerFunc {
17+
return LoggerMiddlewareWithSanitizer(logger, nil)
18+
}
19+
20+
func LoggerMiddlewareWithSanitizer(logger *logging.Logger, sanitizer *RequestBodySanitizer) gin.HandlerFunc {
1721
return func(c *gin.Context) {
1822
// Skip logging portal requests
1923
if !strings.Contains(c.Request.URL.Path, "api/v") {
@@ -22,6 +26,19 @@ func LoggerMiddleware(logger *logging.Logger) gin.HandlerFunc {
2226
}
2327

2428
logger := logger.Ctx(c.Request.Context()).WithOptions(zap.AddStacktrace(zap.FatalLevel))
29+
30+
// Buffer request body if we have a sanitizer and this might be a destination request
31+
var bufferedBody *BufferedReader
32+
var requestBodyFields []zap.Field
33+
34+
if sanitizer != nil && shouldBufferRequestBody(c) {
35+
if br, err := NewBufferedReader(c.Request.Body); err == nil {
36+
bufferedBody = br
37+
// Replace the request body with a new reader so the handler can still read it
38+
c.Request.Body = br.NewReadCloser()
39+
}
40+
}
41+
2542
c.Next()
2643

2744
fields := []zap.Field{}
@@ -30,6 +47,12 @@ func LoggerMiddleware(logger *logging.Logger) gin.HandlerFunc {
3047
fields = append(fields, queryFields(c)...)
3148
fields = append(fields, errorFields(c)...)
3249

50+
// Add sanitized request body for 5xx errors
51+
if c.Writer.Status() >= 500 && bufferedBody != nil {
52+
requestBodyFields = getRequestBodyFields(bufferedBody, sanitizer)
53+
fields = append(fields, requestBodyFields...)
54+
}
55+
3356
if c.Writer.Status() >= 500 {
3457
logger.Error("request completed", fields...)
3558

@@ -155,3 +178,47 @@ func getErrorWithStackTrace(err error) error {
155178
}
156179
return err
157180
}
181+
182+
// shouldBufferRequestBody determines if we should buffer the request body for potential logging
183+
func shouldBufferRequestBody(c *gin.Context) bool {
184+
// Only buffer POST, PUT, PATCH requests that might contain request bodies
185+
method := c.Request.Method
186+
if method != "POST" && method != "PUT" && method != "PATCH" {
187+
return false
188+
}
189+
190+
// Exclude publish endpoints since they contain user data that shouldn't be logged. We could consider making this configurable in the future.
191+
path := c.Request.URL.Path
192+
if strings.Contains(path, "/publish") {
193+
return false
194+
}
195+
196+
// Buffer all other POST/PUT/PATCH requests for potential 5xx error logging
197+
return true
198+
}
199+
200+
// getRequestBodyFields creates log fields for sanitized request body
201+
func getRequestBodyFields(bufferedBody *BufferedReader, sanitizer *RequestBodySanitizer) []zap.Field {
202+
if bufferedBody == nil || sanitizer == nil {
203+
return []zap.Field{
204+
zap.String("request_body", "[NO_BODY]"),
205+
}
206+
}
207+
208+
sanitizedBody, err := sanitizer.SanitizeRequestBody(bufferedBody.NewReader())
209+
if err != nil {
210+
return []zap.Field{
211+
zap.String("request_body_error", err.Error()),
212+
}
213+
}
214+
215+
if len(sanitizedBody) == 0 {
216+
return []zap.Field{
217+
zap.String("request_body", "[EMPTY_BODY]"),
218+
}
219+
}
220+
221+
return []zap.Field{
222+
zap.String("request_body", string(sanitizedBody)),
223+
}
224+
}

0 commit comments

Comments
 (0)