Skip to content

Commit c982ada

Browse files
cap ideal error rate, and fix tests
1 parent 5a59ee4 commit c982ada

File tree

2 files changed

+68
-44
lines changed

2 files changed

+68
-44
lines changed

lib/semian/pid_controller.rb

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,11 @@ def calculate_ideal_error_rate
162162

163163
# Calculate p90 of error rates
164164
sorted = @error_rate_history.sort
165-
index = (sorted.size * 0.9).floor
166-
sorted[index] || sorted.last
165+
index = (sorted.size * 0.9).floor - 1
166+
p90_value = sorted[index] || sorted.last
167+
168+
# Cap at 10% to prevent bootstrapping issues
169+
[p90_value, 0.1].min
167170
end
168171

169172
def cleanup_old_data(current_time)

test/pid_controller_test.rb

Lines changed: 63 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ class TestPIDController < Minitest::Test
77
include TimeHelper
88

99
def setup
10+
Process.stubs(:clock_gettime).with(Process::CLOCK_MONOTONIC).returns(1000)
1011
@controller = Semian::PIDController.new(
1112
name: "test_controller",
1213
kp: 1.0,
@@ -192,17 +193,35 @@ def test_ideal_error_rate_calculation_p90
192193
history_duration: 100,
193194
)
194195

195-
# Simulate error rates over time: [0.1, 0.2, 0.3, ..., 1.0]
196-
10.times do |i|
197-
error_rate = i * 0.1
198-
controller.send(:store_error_rate, error_rate)
196+
5.times do
197+
controller.send(:store_error_rate, 0.05)
198+
end
199+
4.times do
200+
controller.send(:store_error_rate, 0.09)
201+
end
202+
controller.send(:store_error_rate, 0.99)
203+
204+
ideal_rate = controller.send(:calculate_ideal_error_rate)
205+
206+
assert_equal(0.09, ideal_rate)
207+
end
208+
209+
def test_ideal_error_rate_cap
210+
controller = Semian::PIDController.new(
211+
name: "test_cap",
212+
window_size: 1,
213+
history_duration: 100,
214+
)
215+
216+
# Simulate high error rates
217+
10.times do
218+
controller.send(:store_error_rate, 0.5) # 50% error rate
199219
end
200220

201-
# p90 of [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
202-
# Index 9 * 0.9 = 8.1, floor = 8, which is 0.9
203221
ideal_rate = controller.send(:calculate_ideal_error_rate)
204222

205-
assert_equal(0.9, ideal_rate)
223+
# Should be capped at 10%
224+
assert_equal(0.1, ideal_rate)
206225
end
207226

208227
def test_old_data_cleanup
@@ -229,24 +248,25 @@ def test_old_data_cleanup
229248
end
230249

231250
def test_data_preserved_within_window
232-
# Record data at different times within the window
251+
# Record data outside the window
233252
@controller.record_request(:error)
234253

235-
time_travel(2.0) do
254+
# Record data at different times within the window
255+
time_travel(4.0) do
236256
@controller.record_request(:success)
237-
end
238257

239-
time_travel(4.0) do
240-
@controller.record_request(:error)
258+
time_travel(2.0) do
259+
@controller.record_request(:error)
241260

242-
# All data should still be present (all within 5-second window)
243-
outcomes = @controller.instance_variable_get(:@request_outcomes)
261+
# All data should still be present (all within 5-second window)
262+
outcomes = @controller.instance_variable_get(:@request_outcomes)
244263

245-
assert_equal(3, outcomes.size)
264+
assert_equal(2, outcomes.size)
246265

247-
metrics = @controller.metrics
248-
# 2 errors, 1 success = 0.666... error rate
249-
assert_in_delta(0.666, metrics[:error_rate], 0.01)
266+
metrics = @controller.metrics
267+
# 1 error, 1 success = 0.5 error rate
268+
assert_in_delta(0.5, metrics[:error_rate], 0.01)
269+
end
250270
end
251271
end
252272

@@ -276,38 +296,39 @@ def test_pid_integration_behavior
276296

277297
time_travel(1.0) do
278298
@controller.update
279-
end
280-
initial_rejection = @controller.rejection_rate
281299

282-
# Errors start occurring
283-
time_travel(1.0) do
284-
10.times { @controller.record_request(:error) }
285-
3.times { @controller.record_ping(:failure) }
286-
end
300+
initial_rejection = @controller.rejection_rate
287301

288-
time_travel(1.0) do
289-
@controller.update
290-
end
302+
# Errors start occurring
303+
time_travel(1.0) do
304+
10.times { @controller.record_request(:error) }
305+
3.times { @controller.record_ping(:failure) }
291306

292-
# Rejection rate should increase
293-
mid_rejection = @controller.rejection_rate
307+
time_travel(1.0) do
308+
@controller.update
294309

295-
assert_operator(mid_rejection, :>, initial_rejection)
310+
# Rejection rate should increase
311+
mid_rejection = @controller.rejection_rate
296312

297-
# Now dependency recovers
298-
time_travel(1.0) do
299-
20.times { @controller.record_request(:success) }
300-
10.times { @controller.record_ping(:success) }
301-
end
313+
assert_operator(mid_rejection, :>, initial_rejection)
302314

303-
time_travel(1.0) do
304-
@controller.update
305-
end
315+
# Now dependency recovers
316+
time_travel(1.0) do
317+
20.times { @controller.record_request(:success) }
318+
10.times { @controller.record_ping(:success) }
306319

307-
# Rejection rate should decrease
308-
final_rejection = @controller.rejection_rate
320+
time_travel(1.0) do
321+
@controller.update
309322

310-
assert_operator(final_rejection, :<, mid_rejection)
323+
# Rejection rate should decrease
324+
final_rejection = @controller.rejection_rate
325+
326+
assert_operator(final_rejection, :<, mid_rejection)
327+
end
328+
end
329+
end
330+
end
331+
end
311332
end
312333
end
313334

0 commit comments

Comments
 (0)