Skip to content

Commit 19596ed

Browse files
committed
Log redactor upgrade w shannon enthropy
1 parent 3706fc0 commit 19596ed

File tree

5 files changed

+1208
-308
lines changed

5 files changed

+1208
-308
lines changed

README.md

Lines changed: 77 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ class StripePaymentService
108108

109109
### Controlled Execution Blocks
110110

111-
**What it does:** Monitors critical operations with automatic start/end logging, exception handling, circuit breakers, and failure callbacks.
111+
**What it does:** Monitors critical operations with automatic start/end logging, exception handling, DB transactions, circuit breakers, and failure callbacks.
112112

113113
```php
114114
use Kirschbaum\Monitor\Facades\Monitor;
@@ -118,14 +118,9 @@ class PaymentService
118118
public function processPayment($amount, $userId)
119119
{
120120
return Monitor::controlled('payment_processing')
121-
->context(['amount' => $amount, 'user_id' => $userId])
122-
->failing(function ($exception, $context) {
123-
// Alert ops team immediately
124-
NotificationService::alertOps('Payment failure', $context);
125-
126-
// Open circuit breaker
127-
CircuitBreaker::open('payment_service', '5 minutes');
128-
})
121+
->with(['amount' => $amount, 'user_id' => $userId]) // Additional log context
122+
->transactioned(3)
123+
->escalated(fn (ControlledFailureMeta $meta) => Slack::notify($meta->toArray()))
129124
->run(function () use ($amount) {
130125
return $this->chargeCard($amount);
131126
});
@@ -145,9 +140,9 @@ class PaymentService
145140
```
146141

147142
**Advanced Features:**
148-
- **Circuit Breakers:** `->breaker('service_name', threshold, decaySeconds)`
149-
- **Database Transactions:** `->transactioned(retries, onlyExceptions, excludeExceptions)`
150-
- **Failure Escalation:** `->escalated($callback)` for critical business processes
143+
- **Circuit Breakers:** `->breaker('service_name', threshold, decaySeconds)` - Automatically opens/closes breaker based on failures
144+
- **Database Transactions:** `->transactioned(retries, onlyExceptions, excludeExceptions)`. This allows to only retry transaction on specific exceptions, or otherwise ignore specific exception.
145+
- **Failure Escalation Path:** `->escalated($callback)` for critical business processes
151146

152147
### Distributed Tracing
153148

@@ -250,39 +245,90 @@ class DataProcessor
250245

251246
### Log Redaction
252247

253-
**What it does:** Automatically scrubs sensitive data from log context to ensure compliance and security.
248+
**What it does:** Automatically scrubs sensitive data from log context using a priority-based system to ensure compliance and security while preserving important data.
249+
250+
**Priority System:**
251+
1. **Safe Keys** (highest) - Never redacted, always shown
252+
2. **Blocked Keys** - Always redacted, regardless of content
253+
3. **Regex Patterns** - Redacts values matching specific patterns
254+
4. **Shannon Entropy** (lowest) - Detects high-entropy secrets like API keys
254255

255256
**Configuration:** Redaction options in `config/monitor.php`:
256257

257258
```php
258259
'log_redactor' => [
259260
'enabled' => true,
260-
'redact_keys' => [
261-
'password', 'token', 'api_key', 'authorization',
262-
'ssn', 'credit_card', 'private_key'
261+
262+
// Priority 1: Keys that should NEVER be redacted
263+
'safe_keys' => [
264+
'id', 'uuid', 'created_at', 'updated_at', 'timestamp',
265+
'user_id', 'order_id', 'status', 'type', 'name'
266+
],
267+
268+
// Priority 2: Keys that should ALWAYS be redacted
269+
'blocked_keys' => [
270+
'password', 'token', 'api_key', 'authorization', 'secret',
271+
'ssn', 'ein', 'credit_card', 'private_key', 'email'
263272
],
273+
274+
// Priority 3: Regex patterns for value-based detection
264275
'patterns' => [
265276
'email' => '/[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+/',
266277
'credit_card' => '/\b(?:\d[ -]*?){13,16}\b/',
267-
'bearer_token' => '/Bearer\s+[A-Za-z0-9\-._~+\/]+=*/',
278+
'ssn' => '/\b\d{3}-?\d{2}-?\d{4}\b/',
279+
'phone' => '/\b\d{3}[.-]?\d{3}[.-]?\d{4}\b/',
280+
],
281+
282+
// Priority 4: Shannon entropy detection for unknown secrets
283+
'shannon_entropy' => [
284+
'enabled' => true,
285+
'threshold' => 4.5, // Entropy threshold (0-8 scale)
286+
'min_length' => 20, // Minimum string length to analyze
268287
],
288+
269289
'replacement' => '[REDACTED]',
290+
'mark_redacted' => true, // Add "_redacted": true marker
270291
'max_value_length' => 10000, // Truncate large values
271292
'redact_large_objects' => true, // Limit large arrays/objects
272293
'max_object_size' => 50,
273294
],
274295
```
275296

276-
**What happens:**
297+
**How it works:**
277298
```php
278299
Monitor::from($this)->info('User data', [
279-
'email' => '[email protected]', // → '[REDACTED]'
280-
'password' => 'secret123', // → '[REDACTED]'
281-
'token' => 'Bearer abc123', // → '[REDACTED]'
282-
'name' => 'John Doe' // → 'John Doe' (unchanged)
300+
// Safe keys - never redacted (Priority 1)
301+
'id' => 123, // → 123 (safe key)
302+
'user_id' => 456, // → 456 (safe key)
303+
'created_at' => '2024-01-15', // → '2024-01-15' (safe key)
304+
305+
// Blocked keys - always redacted (Priority 2)
306+
'password' => 'secret123', // → '[REDACTED]' (blocked key)
307+
'email' => '[email protected]', // → '[REDACTED]' (blocked key wins over pattern)
308+
309+
// Pattern matching - value-based (Priority 3)
310+
'contact' => '[email protected]', // → '[REDACTED]' (email pattern)
311+
'card' => '4111-1111-1111-1111', // → '[REDACTED]' (credit card pattern)
312+
313+
// Shannon entropy - high entropy secrets (Priority 4)
314+
'api_token' => 'sk-1234567890abcdef...', // → '[REDACTED]' (high entropy)
315+
'jwt' => 'eyJ0eXAiOiJKV1QiLCJhbGc...', // → '[REDACTED]' (high entropy)
316+
317+
// Normal data - unchanged
318+
'name' => 'John Doe', // → 'John Doe' (low entropy, not blocked)
319+
'description' => 'A simple task', // → 'A simple task' (normal text)
283320
]);
321+
322+
// Result includes redaction marker when data was modified
323+
// { ..., "_redacted": true }
284324
```
285325

326+
**Shannon Entropy Detection:**
327+
- Automatically detects API keys, JWT tokens, and other high-entropy secrets
328+
- Ignores common patterns like URLs, UUIDs, dates, and file paths
329+
- Configurable threshold and minimum length requirements
330+
- Prevents false positives on normal text and structured data
331+
286332
## Configuration
287333

288334
**Environment Variables:**
@@ -304,6 +350,15 @@ MONITOR_TRACE_HEADER=X-Trace-Id
304350
# Log redaction
305351
MONITOR_LOG_REDACTOR_ENABLED=true
306352
MONITOR_LOG_REDACTOR_REPLACEMENT='[REDACTED]'
353+
MONITOR_LOG_REDACTOR_MARK_REDACTED=true
354+
MONITOR_LOG_REDACTOR_MAX_VALUE_LENGTH=10000
355+
MONITOR_LOG_REDACTOR_LARGE_OBJECTS=true
356+
MONITOR_LOG_REDACTOR_MAX_OBJECT_SIZE=50
357+
358+
# Shannon entropy detection
359+
MONITOR_LOG_REDACTOR_SHANNON_ENABLED=true
360+
MONITOR_LOG_REDACTOR_SHANNON_THRESHOLD=4.5
361+
MONITOR_LOG_REDACTOR_SHANNON_MIN_LENGTH=20
307362
```
308363

309364
**Logging Channel:** Configure a dedicated Monitor logging channel:

composer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"name": "kirschbaum-development/monitor",
2+
"name": "kirschbaum/monitor",
33
"description": "Laravel observability toolkit with critical control points, structured logging, performance timing, and trace context.",
44
"type": "library",
55
"license": "MIT",

config/monitor.php

Lines changed: 125 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -279,30 +279,97 @@
279279

280280
/*
281281
|----------------------------------------------------------------------
282-
| Redact Keys
282+
| Safe Keys
283283
|----------------------------------------------------------------------
284284
|
285-
| Keys that always get redacted (case-insensitive match). If any
286-
| context key matches these names, the entire value will be replaced
287-
| with the configured replacement string.
285+
| Keys that should NEVER be redacted (case-insensitive match).
286+
| These keys will always show their values unredacted, regardless
287+
| of other redaction rules. Useful for identifiers and timestamps.
288288
|
289289
*/
290290

291-
'redact_keys' => [
291+
'safe_keys' => [
292+
// Core identifiers (high frequency)
293+
'id',
294+
'uuid',
295+
'user_id',
296+
'order_id',
297+
'session_id',
298+
'request_id',
299+
300+
// Timestamps & metadata (high frequency)
301+
'created_at',
302+
'updated_at',
303+
'timestamp',
304+
305+
// Monitor framework keys (highest frequency)
306+
'level',
307+
'event',
308+
'message',
309+
'trace_id',
310+
'channel',
311+
'duration_ms',
312+
'memory_mb',
313+
314+
// Controlled block keys (frequent in enterprise usage)
315+
'controlled_block',
316+
'controlled_block_id',
317+
'attempt',
318+
'status',
319+
'breaker_tripped',
320+
'escalated',
321+
322+
// Common business identifiers
323+
'name',
324+
'title',
325+
'type',
326+
'method',
327+
'path',
328+
'url',
329+
'ip',
330+
'user_agent',
331+
'operation',
332+
'action',
333+
'source',
334+
'target',
335+
'version',
336+
'platform',
337+
'environment',
338+
],
339+
340+
/*
341+
|----------------------------------------------------------------------
342+
| Blocked Keys
343+
|----------------------------------------------------------------------
344+
|
345+
| Keys that should ALWAYS be redacted (case-insensitive match).
346+
| These keys will always be redacted, even if they would normally
347+
| be considered safe. Takes priority over safe_keys.
348+
|
349+
*/
350+
351+
'blocked_keys' => [
292352
'password',
293-
'token',
294353
'secret',
354+
'token',
295355
'api_key',
296356
'authorization',
297-
'ssn',
298-
'credit_card',
299357
'auth_token',
300358
'bearer_token',
301359
'access_token',
302360
'refresh_token',
303361
'session_id',
304362
'private_key',
305363
'client_secret',
364+
'email',
365+
'ssn',
366+
'ein',
367+
'social_security_number',
368+
'tax_id',
369+
'credit_card',
370+
'card_number',
371+
'cvv',
372+
'pin',
306373
],
307374

308375
/*
@@ -317,15 +384,16 @@
317384
*/
318385

319386
'patterns' => [
387+
// Ordered by frequency and performance (most common/fastest first)
320388
'email' => '/[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+/',
389+
'phone_simple' => '/\b\d{3}[.-]?\d{3}[.-]?\d{4}\b/',
390+
'ssn' => '/\b\d{3}-?\d{2}-?\d{4}\b/',
321391
'credit_card' => '/\b(?:\d[ -]*?){13,16}\b/',
322-
'ssn' => '/\b\d{3}-\d{2}-\d{4}\b/',
323-
'phone' => '/\+?\d[\d -]{8,14}\d/',
324-
'bearer_token' => '/Bearer\s+[A-Za-z0-9\-._~+\/]+=*/',
325-
'api_key' => '/(api|apikey|api_key)\s*[:=]\s*[A-Za-z0-9\-_]{20,}/i',
326-
'jwt_token' => '/eyJ[A-Za-z0-9\-_]+\.eyJ[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_.+\/=]*/',
327-
'ipv4' => '/\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/',
328392
'url_with_auth' => '/https?:\/\/[^:\/\s]+:[^@\/\s]+@[^\s]+/',
393+
394+
// Note: IPv4 removed (common pattern bypass handles this)
395+
// Note: Complex phone patterns removed (Shannon entropy will catch international)
396+
// Note: API key patterns removed (Shannon entropy is better for these)
329397
],
330398

331399
/*
@@ -360,10 +428,13 @@
360428
| Maximum length for string values before they are considered "large
361429
| blobs" and get redacted. Set to null to disable length-based redaction.
362430
| This helps prevent large payloads from cluttering logs.
431+
| 20000: Performance optimized (allows larger strings, less truncation overhead)
432+
| 10000: Balanced approach
433+
| 5000: More aggressive truncation (better for storage-constrained environments)
363434
|
364435
*/
365436

366-
'max_value_length' => env('MONITOR_LOG_REDACTOR_MAX_VALUE_LENGTH', 10000),
437+
'max_value_length' => env('MONITOR_LOG_REDACTOR_MAX_VALUE_LENGTH', 20000),
367438

368439
/*
369440
|----------------------------------------------------------------------
@@ -385,9 +456,47 @@
385456
|
386457
| Maximum number of items in an array or object before it gets redacted.
387458
| Only applies when redact_large_objects is enabled.
459+
| 100: Performance optimized (allows larger objects, less redaction overhead)
460+
| 50: Balanced approach
461+
| 25: More aggressive redaction (better for memory-constrained environments)
388462
|
389463
*/
390464

391-
'max_object_size' => env('MONITOR_LOG_REDACTOR_MAX_OBJECT_SIZE', 50),
465+
'max_object_size' => env('MONITOR_LOG_REDACTOR_MAX_OBJECT_SIZE', 100),
466+
467+
/*
468+
|----------------------------------------------------------------------
469+
| Shannon Entropy Configuration
470+
|----------------------------------------------------------------------
471+
|
472+
| Shannon entropy analysis for detecting high-entropy strings like
473+
| API keys, tokens, and secrets that might not match specific patterns.
474+
| This is used as a last resort after safe_keys, blocked_keys, and
475+
| regex patterns have been checked.
476+
|
477+
*/
478+
479+
'shannon_entropy' => [
480+
/*
481+
| Enable Shannon entropy analysis for detecting potential secrets
482+
*/
483+
'enabled' => env('MONITOR_LOG_REDACTOR_SHANNON_ENABLED', true),
484+
485+
/*
486+
| Entropy threshold (0.0 - ~8.0). Higher values = more selective.
487+
| 4.8: Balanced performance/security (recommended for production)
488+
| 4.5: More sensitive detection (better security, more CPU)
489+
| 5.0: Higher performance (fewer false positives, less CPU)
490+
*/
491+
'threshold' => env('MONITOR_LOG_REDACTOR_SHANNON_THRESHOLD', 4.8),
492+
493+
/*
494+
| Minimum string length to analyze. Shorter strings are ignored.
495+
| 25: Performance optimized (recommended for high-volume logging)
496+
| 20: More sensitive detection
497+
| 30: Higher performance, may miss some short tokens
498+
*/
499+
'min_length' => env('MONITOR_LOG_REDACTOR_SHANNON_MIN_LENGTH', 25),
500+
],
392501
],
393502
];

0 commit comments

Comments
 (0)