Skip to content

Commit 6f37c49

Browse files
fix: .NET bug sweep — thread safety, error surfacing, caching, disposal (#252)
- AuditEmitter: surface handler exceptions via HandlerError callback instead of silently swallowing (#147) - SagaOrchestrator: add per-saga lock (SyncRoot) guarding all state mutations during execution and compensation (#145) - GovernanceMiddleware: cache FindMatchingRule lookups in a dictionary keyed by rule name, invalidated on policy count change (#148) - GovernanceKernel: implement IDisposable to clean up Metrics (#150) - AgentIdentity: throw InvalidOperationException instead of returning silent false when private key is missing for HMAC verify (#141) - FileTrustStore: preserve corrupted file as .corrupt backup and surface error via loadErrorHandler callback (#152) Closes #141, closes #145, closes #147, closes #148, closes #150, closes #152 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 88edfe2 commit 6f37c49

File tree

7 files changed

+125
-65
lines changed

7 files changed

+125
-65
lines changed

packages/agent-governance-dotnet/src/AgentGovernance/Audit/AuditEmitter.cs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ public sealed class AuditEmitter
2121
/// </summary>
2222
private readonly ConcurrentBag<Action<GovernanceEvent>> _wildcardHandlers = new();
2323

24+
/// <summary>
25+
/// Optional callback invoked when a handler throws an exception.
26+
/// Allows callers to log or monitor handler failures without disrupting other subscribers.
27+
/// </summary>
28+
public Action<Exception, GovernanceEvent>? HandlerError { get; init; }
29+
2430
/// <summary>
2531
/// Subscribes a handler to a specific governance event type.
2632
/// </summary>
@@ -116,19 +122,19 @@ public int HandlerCount(GovernanceEventType type)
116122
public int WildcardHandlerCount => _wildcardHandlers.Count;
117123

118124
/// <summary>
119-
/// Safely invokes a handler, catching and swallowing any exceptions
120-
/// to prevent one faulty handler from disrupting other subscribers.
125+
/// Safely invokes a handler, catching exceptions to prevent one faulty
126+
/// handler from disrupting other subscribers. Exceptions are surfaced
127+
/// through the <see cref="HandlerError"/> callback when configured.
121128
/// </summary>
122-
private static void InvokeSafe(Action<GovernanceEvent> handler, GovernanceEvent governanceEvent)
129+
private void InvokeSafe(Action<GovernanceEvent> handler, GovernanceEvent governanceEvent)
123130
{
124131
try
125132
{
126133
handler(governanceEvent);
127134
}
128-
catch
135+
catch (Exception ex)
129136
{
130-
// Swallow handler exceptions to maintain event bus stability.
131-
// In production, consider logging handler errors.
137+
HandlerError?.Invoke(ex, governanceEvent);
132138
}
133139
}
134140
}

packages/agent-governance-dotnet/src/AgentGovernance/GovernanceKernel.cs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ public sealed class GovernanceOptions
102102
/// }
103103
/// </code>
104104
/// </remarks>
105-
public sealed class GovernanceKernel
105+
public sealed class GovernanceKernel : IDisposable
106106
{
107107
/// <summary>
108108
/// The policy evaluation engine used by this kernel.
@@ -261,4 +261,14 @@ public void OnAllEvents(Action<GovernanceEvent> handler)
261261
{
262262
AuditEmitter.OnAll(handler);
263263
}
264+
265+
/// <inheritdoc />
266+
public void Dispose()
267+
{
268+
if (_disposed) return;
269+
_disposed = true;
270+
(Metrics as IDisposable)?.Dispose();
271+
}
272+
273+
private bool _disposed;
264274
}

packages/agent-governance-dotnet/src/AgentGovernance/Hypervisor/SagaOrchestrator.cs

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ public sealed class Saga
7575
public List<SagaStep> Steps { get; } = new();
7676
public List<string> FailedCompensations { get; } = new();
7777
public DateTime CreatedUtc { get; } = DateTime.UtcNow;
78+
79+
/// <summary>
80+
/// Per-saga lock for synchronizing state mutations during execution and compensation.
81+
/// </summary>
82+
internal object SyncRoot { get; } = new();
7883
}
7984

8085
/// <summary>
@@ -122,19 +127,19 @@ public void AddStep(Saga saga, SagaStep step)
122127
public async Task<bool> ExecuteAsync(Saga saga, CancellationToken cancellationToken = default)
123128
{
124129
ArgumentNullException.ThrowIfNull(saga);
125-
saga.State = SagaState.Executing;
130+
lock (saga.SyncRoot) { saga.State = SagaState.Executing; }
126131

127132
foreach (var step in saga.Steps)
128133
{
129-
var success = await ExecuteStepAsync(step, cancellationToken).ConfigureAwait(false);
134+
var success = await ExecuteStepAsync(saga, step, cancellationToken).ConfigureAwait(false);
130135
if (!success)
131136
{
132137
await CompensateAsync(saga, cancellationToken).ConfigureAwait(false);
133138
return false;
134139
}
135140
}
136141

137-
saga.State = SagaState.Committed;
142+
lock (saga.SyncRoot) { saga.State = SagaState.Committed; }
138143
return true;
139144
}
140145

@@ -149,58 +154,58 @@ public async Task<bool> ExecuteAsync(Saga saga, CancellationToken cancellationTo
149154
}
150155
}
151156

152-
private async Task<bool> ExecuteStepAsync(SagaStep step, CancellationToken cancellationToken)
157+
private async Task<bool> ExecuteStepAsync(Saga saga, SagaStep step, CancellationToken cancellationToken)
153158
{
154159
for (int attempt = 0; attempt < step.MaxRetries; attempt++)
155160
{
156-
step.State = StepState.Executing;
157-
step.Error = null;
161+
lock (saga.SyncRoot) { step.State = StepState.Executing; step.Error = null; }
158162

159163
try
160164
{
161165
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
162166
cts.CancelAfter(step.Timeout);
163167

164-
step.Result = await step.Execute(cts.Token).ConfigureAwait(false);
165-
step.State = StepState.Committed;
168+
var result = await step.Execute(cts.Token).ConfigureAwait(false);
169+
lock (saga.SyncRoot) { step.Result = result; step.State = StepState.Committed; }
166170
return true;
167171
}
168172
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
169173
{
170-
step.Error = $"Step '{step.ActionId}' timed out after {step.Timeout.TotalSeconds}s.";
174+
lock (saga.SyncRoot) { step.Error = $"Step '{step.ActionId}' timed out after {step.Timeout.TotalSeconds}s."; }
171175
}
172176
catch (Exception ex)
173177
{
174-
step.Error = $"Step '{step.ActionId}' failed: {ex.Message}";
178+
lock (saga.SyncRoot) { step.Error = $"Step '{step.ActionId}' failed: {ex.Message}"; }
175179
}
176180

177-
// Exponential backoff before retry.
178181
if (attempt + 1 < step.MaxRetries)
179182
{
180183
var delay = TimeSpan.FromSeconds(Math.Pow(2, attempt));
181184
await Task.Delay(delay, cancellationToken).ConfigureAwait(false);
182185
}
183186
}
184187

185-
step.State = StepState.Failed;
188+
lock (saga.SyncRoot) { step.State = StepState.Failed; }
186189
return false;
187190
}
188191

189192
private async Task CompensateAsync(Saga saga, CancellationToken cancellationToken)
190193
{
191-
saga.State = SagaState.Compensating;
192-
193-
// Compensate in reverse order of committed steps.
194-
var committedSteps = saga.Steps
195-
.Where(s => s.State == StepState.Committed)
196-
.Reverse()
197-
.ToList();
194+
List<SagaStep> committedSteps;
195+
lock (saga.SyncRoot)
196+
{
197+
saga.State = SagaState.Compensating;
198+
committedSteps = saga.Steps
199+
.Where(s => s.State == StepState.Committed)
200+
.Reverse()
201+
.ToList();
202+
}
198203

199204
foreach (var step in committedSteps)
200205
{
201206
if (step.Compensate is null)
202207
{
203-
step.State = StepState.Compensated;
208+
lock (saga.SyncRoot) { step.State = StepState.Compensated; }
204209
continue;
205210
}
206211

@@ -210,18 +215,24 @@ private async Task CompensateAsync(Saga saga, CancellationToken cancellationToke
210215
cts.CancelAfter(step.Timeout);
211216

212217
await step.Compensate(cts.Token).ConfigureAwait(false);
213-
step.State = StepState.Compensated;
218+
lock (saga.SyncRoot) { step.State = StepState.Compensated; }
214219
}
215220
catch (Exception ex)
216221
{
217-
step.State = StepState.CompensationFailed;
218-
step.Error = $"Compensation for '{step.ActionId}' failed: {ex.Message}";
219-
saga.FailedCompensations.Add(step.ActionId);
222+
lock (saga.SyncRoot)
223+
{
224+
step.State = StepState.CompensationFailed;
225+
step.Error = $"Compensation for '{step.ActionId}' failed: {ex.Message}";
226+
saga.FailedCompensations.Add(step.ActionId);
227+
}
220228
}
221229
}
222230

223-
saga.State = saga.FailedCompensations.Count > 0
224-
? SagaState.Escalated
225-
: SagaState.Aborted;
231+
lock (saga.SyncRoot)
232+
{
233+
saga.State = saga.FailedCompensations.Count > 0
234+
? SagaState.Escalated
235+
: SagaState.Aborted;
236+
}
226237
}
227238
}

packages/agent-governance-dotnet/src/AgentGovernance/Integration/GovernanceMiddleware.cs

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ public sealed class GovernanceMiddleware
5858
{
5959
private readonly PolicyEngine _policyEngine;
6060
private readonly AuditEmitter _auditEmitter;
61+
private Dictionary<string, PolicyRule>? _ruleLookupCache;
62+
private int _cachedPolicyCount = -1;
6163
private readonly RateLimiter? _rateLimiter;
6264
private readonly GovernanceMetrics? _metrics;
6365
private readonly RingEnforcer? _ringEnforcer;
@@ -296,18 +298,28 @@ private static Dictionary<string, object> BuildContext(
296298
}
297299

298300
/// <summary>
299-
/// Searches loaded policies for a rule by name.
301+
/// Searches loaded policies for a rule by name using a cached lookup.
302+
/// Cache is invalidated when the policy count changes.
300303
/// </summary>
301304
private PolicyRule? FindMatchingRule(string ruleName)
302305
{
303-
foreach (var policy in _policyEngine.ListPolicies())
306+
var policies = _policyEngine.ListPolicies();
307+
var currentCount = policies.Count;
308+
309+
if (_ruleLookupCache is null || _cachedPolicyCount != currentCount)
304310
{
305-
foreach (var rule in policy.Rules)
311+
var cache = new Dictionary<string, PolicyRule>(StringComparer.Ordinal);
312+
foreach (var policy in policies)
306313
{
307-
if (string.Equals(rule.Name, ruleName, StringComparison.Ordinal))
308-
return rule;
314+
foreach (var rule in policy.Rules)
315+
{
316+
cache.TryAdd(rule.Name, rule);
317+
}
309318
}
319+
_ruleLookupCache = cache;
320+
_cachedPolicyCount = currentCount;
310321
}
311-
return null;
322+
323+
return _ruleLookupCache.TryGetValue(ruleName, out var cached) ? cached : null;
312324
}
313325
}

packages/agent-governance-dotnet/src/AgentGovernance/Trust/AgentIdentity.cs

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -121,44 +121,48 @@ public byte[] Sign(string message)
121121
}
122122

123123
/// <summary>
124-
/// Verifies a signature against data using this identity's public key.
124+
/// Verifies a signature against data using this identity's key material.
125125
/// </summary>
126126
/// <param name="data">The data that was signed.</param>
127127
/// <param name="signature">The signature to verify.</param>
128128
/// <returns><c>true</c> if the signature is valid; otherwise <c>false</c>.</returns>
129-
/// <remarks>
130-
/// In the HMAC-SHA256 fallback, verification requires the private key to
131-
/// recompute the HMAC. This is a known limitation; with Ed25519, verification
132-
/// uses only the public key.
133-
/// </remarks>
129+
/// <exception cref="InvalidOperationException">
130+
/// Thrown when this identity does not have a private key. HMAC-SHA256
131+
/// verification requires the signing key. For public-key verification,
132+
/// migrate to Ed25519 on .NET 9+.
133+
/// </exception>
134134
public bool Verify(byte[] data, byte[] signature)
135135
{
136136
ArgumentNullException.ThrowIfNull(data);
137137
ArgumentNullException.ThrowIfNull(signature);
138138

139139
if (PrivateKey is null)
140140
{
141-
// Without Ed25519, we cannot verify with only the public key.
142-
// In production with .NET 9+, use Ed25519 public-key verification.
143-
return false;
141+
throw new InvalidOperationException(
142+
"Cannot verify signature: HMAC-SHA256 requires the private key. " +
143+
"For cross-agent verification with only a public key, migrate to Ed25519 (.NET 9+).");
144144
}
145145

146146
var expected = Sign(data);
147147
return CryptographicOperations.FixedTimeEquals(expected, signature);
148148
}
149149

150150
/// <summary>
151-
/// Verifies a signature using a standalone public key and private key pair.
151+
/// Verifies a signature using a standalone key pair.
152152
/// This static overload is provided for cross-agent verification scenarios.
153153
/// </summary>
154-
/// <param name="publicKey">The public key of the signer.</param>
154+
/// <param name="publicKey">The public key of the signer (unused in HMAC mode; reserved for Ed25519).</param>
155155
/// <param name="data">The signed data.</param>
156156
/// <param name="signature">The signature to verify.</param>
157157
/// <param name="privateKey">
158-
/// The private key for HMAC recomputation (required for HMAC-SHA256 fallback;
159-
/// not needed with Ed25519 on .NET 9+).
158+
/// The private key for HMAC recomputation. Required for HMAC-SHA256;
159+
/// will not be needed with Ed25519 on .NET 9+.
160160
/// </param>
161161
/// <returns><c>true</c> if the signature is valid; otherwise <c>false</c>.</returns>
162+
/// <exception cref="InvalidOperationException">
163+
/// Thrown when <paramref name="privateKey"/> is <c>null</c> because HMAC-SHA256
164+
/// cannot verify without the signing key.
165+
/// </exception>
162166
public static bool VerifySignature(byte[] publicKey, byte[] data, byte[] signature, byte[]? privateKey = null)
163167
{
164168
ArgumentNullException.ThrowIfNull(publicKey);
@@ -167,9 +171,9 @@ public static bool VerifySignature(byte[] publicKey, byte[] data, byte[] signatu
167171

168172
if (privateKey is null)
169173
{
170-
// Cannot verify without private key in HMAC-SHA256 mode.
171-
// With Ed25519 (.NET 9+), this would use the public key directly.
172-
return false;
174+
throw new InvalidOperationException(
175+
"Cannot verify signature: HMAC-SHA256 requires the private key. " +
176+
"For public-key-only verification, migrate to Ed25519 (.NET 9+).");
173177
}
174178

175179
using var hmac = new HMACSHA256(privateKey);

packages/agent-governance-dotnet/src/AgentGovernance/Trust/FileTrustStore.cs

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ public sealed class FileTrustStore : IDisposable
1616
private readonly object _ioLock = new();
1717
private readonly double _defaultScore;
1818
private readonly double _decayRate;
19+
private readonly Action<Exception, string>? _loadErrorHandler;
1920
private bool _disposed;
2021

2122
private static readonly JsonSerializerOptions JsonOptions = new()
@@ -34,12 +35,17 @@ public sealed class FileTrustStore : IDisposable
3435
/// Trust decay rate per hour of inactivity (0–1000). Defaults to 10.
3536
/// Agents that have not had positive signals lose trust over time.
3637
/// </param>
37-
public FileTrustStore(string filePath, double defaultScore = 500.0, double decayRate = 10.0)
38+
/// <param name="loadErrorHandler">
39+
/// Optional callback invoked when the trust store file is corrupted.
40+
/// Receives the exception and file path. When null, corruption is handled silently.
41+
/// </param>
42+
public FileTrustStore(string filePath, double defaultScore = 500.0, double decayRate = 10.0, Action<Exception, string>? loadErrorHandler = null)
3843
{
3944
ArgumentException.ThrowIfNullOrWhiteSpace(filePath);
4045
_filePath = filePath;
4146
_defaultScore = Math.Clamp(defaultScore, 0, 1000);
4247
_decayRate = Math.Max(0, decayRate);
48+
_loadErrorHandler = loadErrorHandler;
4349

4450
Load();
4551
}
@@ -179,9 +185,20 @@ private void Load()
179185
}
180186
}
181187
}
182-
catch (JsonException)
188+
catch (JsonException ex)
183189
{
184-
// Corrupted file — start fresh.
190+
// Preserve corrupted file for forensics.
191+
try
192+
{
193+
var corruptPath = _filePath + ".corrupt";
194+
File.Copy(_filePath, corruptPath, overwrite: true);
195+
}
196+
catch
197+
{
198+
// Best-effort backup; ignore if it fails.
199+
}
200+
201+
_loadErrorHandler?.Invoke(ex, _filePath);
185202
}
186203
}
187204
}

0 commit comments

Comments
 (0)