Skip to content

Commit 8e43fde

Browse files
author
Clément Le Provost
authored
Stateful host statuses (#157)
* Stateful host statuses The client now keeps an up/down status for each of its hosts, and avoids targeting down hosts, unless their status is considered obsolete, as per the time frame indicated by `hostStatusesTimeout`. This way, if a host is down for any reason (DNS resolution, routing problem, server down…), it will be ignored for subsequent requests, until the status timeout has expired. * Increase host status timeout to 5 minutes [ci skip]
1 parent 0631e53 commit 8e43fde

File tree

7 files changed

+208
-10
lines changed

7 files changed

+208
-10
lines changed

Source/AbstractClient.swift

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,38 @@ import Foundation
5151
}
5252

5353

54+
/// Describes what is the last known status of a given API host.
55+
///
56+
internal struct HostStatus {
57+
/// Whether the host is "up" or "down".
58+
/// "Up" means it answers normally, "down" means that it doesn't. This does not distinguish between the different
59+
/// kinds of retriable failures: it could be DNS resolution failure, no route to host, response timeout, or server
60+
/// error. A non-retriable failure (e.g. `400 Bad Request`) is not considered for the "down" state.
61+
///
62+
var up: Bool
63+
64+
/// When the status was last modified.
65+
/// This is normally the moment when the client receives the response (or error).
66+
///
67+
var lastModified: Date
68+
}
69+
70+
5471
/// An abstract API client.
5572
///
5673
/// + Warning: Not meant to be used directly. See `Client` or `PlacesClient` instead.
5774
///
75+
/// ## Stateful hosts
76+
///
77+
/// In order to avoid hitting timeouts at every request when one or more hosts are not working properly (whatever the
78+
/// reason: DNS failure, no route to host, server down...), the client maintains a **known status** for each host.
79+
/// That status can be either *up*, *down* or *unknown*. Initially, all hosts are in the *unknown* state. Then a given
80+
/// host's status is updated whenever a request to it returns a response or an error.
81+
///
82+
/// When a host is flagged as *down*, it will not be considered for subsequent requests. However, to avoid discarding
83+
/// hosts permanently, statuses are only remembered for a given timeframe, indicated by `hostStatusTimeout`. (You may
84+
/// adjust it as needed, although the default value `defaultHostStatusTimeout` should make sense for most applications.)
85+
///
5886
@objc public class AbstractClient : NSObject {
5987
// MARK: Properties
6088

@@ -125,6 +153,21 @@ import Foundation
125153
}
126154
}
127155

156+
/// The last known statuses of hosts.
157+
/// If a host is absent from this dictionary, it means its status is unknown.
158+
///
159+
/// + Note: Hosts are never removed from this dictionary, which is a potential memory leak in theory, but does not
160+
/// matter in practice, because (1) the host arrays are provided at init time and seldom updated and (2) very
161+
/// short anyway.
162+
///
163+
internal var hostStatuses: [String: HostStatus] = [:]
164+
165+
/// The timeout for host statuses.
166+
@objc public var hostStatusTimeout: TimeInterval = defaultHostStatusTimeout
167+
168+
/// GCD queue to synchronize access to `hostStatuses`.
169+
internal var hostStatusQueue = DispatchQueue(label: "AbstractClient.hostStatusQueue")
170+
128171
// NOTE: Not constant only for the sake of mocking during unit tests.
129172
var session: URLSession
130173

@@ -137,6 +180,11 @@ import Foundation
137180
/// Dispatch queue used to run completion handlers.
138181
internal var completionQueue = DispatchQueue.main
139182

183+
// MARK: Constant
184+
185+
/// The default timeout for host statuses.
186+
@objc public static let defaultHostStatusTimeout: TimeInterval = 5 * 60
187+
140188
// MARK: Initialization
141189

142190
internal init(appID: String?, apiKey: String?, readHosts: [String], writeHosts: [String]) {
@@ -241,7 +289,39 @@ import Foundation
241289
/// Create a request with this client's settings.
242290
func newRequest(method: HTTPMethod, path: String, body: JSONObject?, hostnames: [String], isSearchQuery: Bool = false, completion: CompletionHandler? = nil) -> Request {
243291
let currentTimeout = isSearchQuery ? searchTimeout : timeout
244-
let request = Request(session: session, method: method, hosts: hostnames, firstHostIndex: 0, path: path, headers: headers, jsonBody: body, timeout: currentTimeout, completion: completion)
292+
let request = Request(client: self, method: method, hosts: hostnames, firstHostIndex: 0, path: path, headers: headers, jsonBody: body, timeout: currentTimeout, completion: completion)
245293
return request
246294
}
295+
296+
/// Filter a list of hosts according to the currently known statuses, keeping only those that are up or unknown.
297+
///
298+
/// - parameter hosts: The list of hosts to filter.
299+
/// - returns: A filtered list of hosts, or the original list if the result of filtering would be empty.
300+
///
301+
func upOrUnknownHosts(_ hosts: [String]) -> [String] {
302+
assert(!hosts.isEmpty)
303+
let now = Date()
304+
let filteredHosts = hostStatusQueue.sync {
305+
return hosts.filter { (host) -> Bool in
306+
if let status = self.hostStatuses[host] { // known status
307+
return status.up || now.timeIntervalSince(status.lastModified) >= self.hostStatusTimeout // include if up or obsolete
308+
} else { // unknown status
309+
return true // always include
310+
}
311+
}
312+
}
313+
// Avoid returning an empty list.
314+
return filteredHosts.isEmpty ? hosts : filteredHosts
315+
}
316+
317+
/// Update the status for a given host.
318+
///
319+
/// - parameter host: The name of the host to update.
320+
/// - parameter up: Whether the host is currently up (true) or down (false).
321+
///
322+
func updateHostStatus(host: String, up: Bool) {
323+
hostStatusQueue.sync {
324+
self.hostStatuses[host] = HostStatus(up: up, lastModified: Date())
325+
}
326+
}
247327
}

Source/Request.swift

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ import Foundation
3030
/// calls into a high-level operation. This operation can be cancelled by the user.
3131
///
3232
internal class Request: AsyncOperationWithCompletion {
33-
let session: URLSession
33+
/// The client to which this request is related.
34+
let client: AbstractClient
3435

3536
/// Request method.
3637
let method: HTTPMethod
@@ -64,10 +65,10 @@ internal class Request: AsyncOperationWithCompletion {
6465

6566
// MARK: - Initialization
6667

67-
init(session: URLSession, method: HTTPMethod, hosts: [String], firstHostIndex: Int, path: String, headers: [String: String]?, jsonBody: JSONObject?, timeout: TimeInterval, completion: CompletionHandler?) {
68-
self.session = session
68+
init(client: AbstractClient, method: HTTPMethod, hosts: [String], firstHostIndex: Int, path: String, headers: [String: String]?, jsonBody: JSONObject?, timeout: TimeInterval, completion: CompletionHandler?) {
69+
self.client = client
6970
self.method = method
70-
self.hosts = hosts
71+
self.hosts = client.upOrUnknownHosts(hosts)
7172
assert(!hosts.isEmpty)
7273
self.firstHostIndex = firstHostIndex
7374
self.nextHostIndex = firstHostIndex
@@ -127,9 +128,10 @@ internal class Request: AsyncOperationWithCompletion {
127128
if _cancelled {
128129
return
129130
}
130-
let request = createRequest(nextHostIndex)
131+
let currentHostIndex = nextHostIndex
132+
let request = createRequest(currentHostIndex)
131133
nextHostIndex = (nextHostIndex + 1) % hosts.count
132-
task = session.dataTask(with: request) {
134+
task = client.session.dataTask(with: request) {
133135
(data: Data?, response: URLResponse?, error: Error?) in
134136
var json: JSONObject?
135137
var finalError: Error? = error
@@ -162,6 +164,10 @@ internal class Request: AsyncOperationWithCompletion {
162164
}
163165
assert(json != nil || finalError != nil)
164166

167+
// Update host status.
168+
let down = finalError != nil && finalError!.isTransient()
169+
self.client.updateHostStatus(host: self.hosts[currentHostIndex], up: !down)
170+
165171
// Success: call completion block.
166172
if finalError == nil {
167173
self.callCompletion(content: json, error: nil)

Tests/ClientTests.swift

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,4 +405,55 @@ class ClientTests: OnlineTestCase {
405405
let memorizedIndex = client.indices.object(forKey: indexName as NSString)
406406
XCTAssertNil(memorizedIndex)
407407
}
408+
409+
/// Test that the status of down hosts is correctly remembered.
410+
func testHostStatus() {
411+
let expectation = self.expectation(description: #function)
412+
413+
client.readHosts[0] = uniqueAlgoliaBizHost()
414+
let maxIterations = 10
415+
let requestTimeout = client.searchTimeout
416+
client.hostStatusTimeout = requestTimeout * (Double(maxIterations) * 2) // ensure that the status will be kept long enough
417+
418+
let startTime = Date()
419+
client.listIndexes(completionHandler: {
420+
(content, error) -> Void in
421+
if let error = error {
422+
XCTFail("\(error)")
423+
expectation.fulfill()
424+
return
425+
}
426+
// Check that the timeout has been hit.
427+
let stopTime = Date()
428+
let duration = stopTime.timeIntervalSince(startTime)
429+
XCTAssert(duration >= requestTimeout)
430+
431+
// Check that the failing host's status has been remembered.
432+
guard let status = self.client.hostStatuses[self.client.readHosts[0]] else { XCTFail(); expectation.fulfill(); return }
433+
XCTAssertFalse(status.up)
434+
435+
// Check that further iterations do not hit the timeout.
436+
func doTest(iteration: Int) {
437+
self.client.listIndexes(completionHandler: {
438+
(content, error) -> Void in
439+
if let error = error {
440+
XCTFail("\(error)")
441+
expectation.fulfill()
442+
return
443+
}
444+
if iteration + 1 < maxIterations {
445+
doTest(iteration: iteration + 1)
446+
} else {
447+
// Check that the timeout has not been hit for all requests.
448+
let stopTime = Date()
449+
let duration = stopTime.timeIntervalSince(startTime)
450+
XCTAssert(duration < requestTimeout * Double(maxIterations + 1))
451+
expectation.fulfill()
452+
}
453+
})
454+
}
455+
doTest(iteration: 0)
456+
})
457+
self.waitForExpectations(timeout: expectationTimeout, handler: nil)
458+
}
408459
}

Tests/Helpers.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,11 @@ func safeIndexName(_ name: String) -> String {
4242
func average(values: [Double]) -> Double {
4343
return values.reduce(0, +) / Double(values.count)
4444
}
45+
46+
/// Generate a new host name in the `algolia.biz` domain.
47+
/// The DNS lookup for any host in the `algolia.biz` domain will time-out.
48+
/// Generating a new host name every time avoids any system-level or network-level caching side effect.
49+
///
50+
func uniqueAlgoliaBizHost() -> String {
51+
return "swift-\(UInt32(NSDate().timeIntervalSince1970)).algolia.biz"
52+
}

Tests/IndexTests.swift

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,9 +1039,7 @@ class IndexTests: OnlineTestCase {
10391039
func testDNSTimeout() {
10401040
let expectation = self.expectation(description: #function)
10411041

1042-
// The DNS lookup for any host in the `algolia.biz` domain will time-out.
1043-
// We generate a new host name every time to avoid any cache effect.
1044-
client.readHosts[0] = "swift-\(UInt32(NSDate().timeIntervalSince1970)).algolia.biz"
1042+
client.readHosts[0] = uniqueAlgoliaBizHost()
10451043

10461044
client.listIndexes(completionHandler: {
10471045
(content, error) -> Void in

Tests/NetworkTests.swift

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,4 +202,58 @@ class NetworkTests: XCTestCase {
202202
}
203203
self.waitForExpectations(timeout: expectationTimeout, handler: nil)
204204
}
205+
206+
/// Test that the status of down hosts is correctly remembered.
207+
func testHostStatus() {
208+
let expectation = self.expectation(description: #function)
209+
210+
client.hostStatusTimeout = 3
211+
212+
// First host is down, second is up.
213+
session.responses["https://\(client.readHosts[0])/1/indexes"] = MockResponse(error: TIMEOUT_ERROR)
214+
session.responses["https://\(client.readHosts[1])/1/indexes"] = MockResponse(statusCode: 200, jsonBody: ["from": "2nd host"])
215+
216+
// Do a query: the 1st host will fail, and the client should transparently fallback to the 2nd host.
217+
client.listIndexes(completionHandler: {
218+
(content, error) -> Void in
219+
// Check that the response is successful and comes from the 2nd host.
220+
guard error == nil else { XCTFail(); expectation.fulfill(); return }
221+
XCTAssertEqual(content?["from"] as? String, "2nd host")
222+
223+
// Check that the failing host's status has been remembered.
224+
guard let status = self.client.hostStatuses[self.client.readHosts[0]] else { XCTFail(); expectation.fulfill(); return }
225+
XCTAssertFalse(status.up)
226+
let statusTimestamp = status.lastModified
227+
228+
// First host is up again.
229+
self.session.responses["https://\(self.client.readHosts[0])/1/indexes"] = MockResponse(statusCode: 200, jsonBody: ["from": "1st host"])
230+
231+
// Do the same query again: the client should ignore the 1st host and target directly the 2nd host.
232+
self.client.listIndexes(completionHandler: {
233+
(content, error) -> Void in
234+
// Check that the response is successful and still comes from the 2nd host.
235+
guard error == nil else { XCTFail(); expectation.fulfill(); return }
236+
XCTAssertEqual(content?["from"] as? String, "2nd host")
237+
238+
// Wait for the down host to be forgotten.
239+
Thread.sleep(forTimeInterval: self.client.hostStatusTimeout)
240+
241+
// Do the same query again: the client should target the 1st host again.
242+
self.client.listIndexes(completionHandler: {
243+
(content, error) -> Void in
244+
// Check that the response is successful and comes from the 1st host.
245+
guard error == nil else { XCTFail(); expectation.fulfill(); return }
246+
XCTAssertEqual(content?["from"] as? String, "1st host")
247+
248+
// Check that the host's status has been updated.
249+
guard let status = self.client.hostStatuses[self.client.readHosts[0]] else { XCTFail(); expectation.fulfill(); return }
250+
XCTAssertTrue(status.up)
251+
XCTAssert(status.lastModified.compare(statusTimestamp) == .orderedDescending)
252+
253+
expectation.fulfill()
254+
})
255+
})
256+
})
257+
self.waitForExpectations(timeout: expectationTimeout, handler: nil)
258+
}
205259
}

Tests/ObjectiveCBridging.m

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ - (void)testClient {
147147
client.readHosts = [client.readHosts arrayByAddingObject:@"nowhere.net"];
148148
client.writeHosts = [client.writeHosts arrayByAddingObject:@"nobody.com"];
149149
[client setHosts:@[ @"nowhere.net", @"nobody.com", @"never.org" ]];
150+
client.hostStatusTimeout = [Client defaultHostStatusTimeout];
150151

151152
// Operations
152153
// ----------

0 commit comments

Comments
 (0)