Skip to content

Commit 4fab2b0

Browse files
Use a single worker thread for LWCA key retrieval
Lightweight CA key retrieval is failing in an IPA test scenario where many sub-CAs are created in rapid succession. The test output suggests resource exhaustion (e.g. LDAP server connections). The current implementation spawns a separate, concurrent key retriever thread for each new LWCA. This behaviour is suspected to be the cause of the failure. Modify LWCA key retrieval to use a single retriever thread per replica. Retrievals are processed sequentially. The exponential backoff on failure is retained. The queue behaviour lives in the new `KeyRetrieverWorker` class, which uses `java.util.Timer` under the hood. `CAEngine` instantiates a single instance of `KeyRetrieverWorker`, which in turn starts the single timer thread and manages the queue. Fixes: https://issues.redhat.com/browse/RHEL-27181 Fixes: dogtagpki#4677
1 parent a74c8ad commit 4fab2b0

File tree

4 files changed

+106
-35
lines changed

4 files changed

+106
-35
lines changed

base/ca/src/main/java/com/netscape/ca/CertificateAuthority.java

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,6 @@ public class CertificateAuthority extends Subsystem implements IAuthority, IOCSP
219219
private ResponderID mResponderIDByName = null;
220220
private ResponderID mResponderIDByHash = null;
221221

222-
private Thread keyRetrieverThread;
223-
224222
/**
225223
* Internal constants
226224
*/
@@ -649,12 +647,7 @@ public synchronized void startKeyRetriever() throws EBaseException {
649647
return;
650648
}
651649

652-
if (keyRetrieverThread != null) {
653-
logger.info("CertificateAuthority: KeyRetriever already running for authority " + authorityID);
654-
return;
655-
}
656-
657-
logger.info("CertificateAuthority: Starting KeyRetriever for authority " + authorityID);
650+
logger.info("CertificateAuthority: Initialising KeyRetriever for authority " + authorityID);
658651

659652
CAEngine engine = CAEngine.getInstance();
660653
CAEngineConfig engineConfig = engine.getConfig();
@@ -688,13 +681,7 @@ public synchronized void startKeyRetriever() throws EBaseException {
688681
}
689682

690683
KeyRetrieverRunner runner = new KeyRetrieverRunner(keyRetriever, this);
691-
692-
keyRetrieverThread = new Thread(runner, "KeyRetriever-" + authorityID);
693-
keyRetrieverThread.start();
694-
}
695-
696-
public synchronized void removeKeyRetriever() {
697-
keyRetrieverThread = null;
684+
engine.getKeyRetrieverWorker().requestKeyRetrieval(runner);
698685
}
699686

700687
public void initSigningUnits() throws Exception {

base/ca/src/main/java/com/netscape/ca/KeyRetrieverRunner.java

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
import com.netscape.certsrv.ca.CAMissingKeyException;
3434
import com.netscape.cmsutil.crypto.CryptoUtil;
3535

36-
public class KeyRetrieverRunner implements Runnable {
36+
public class KeyRetrieverRunner {
3737

3838
public final static org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(KeyRetrieverRunner.class);
3939

@@ -61,24 +61,10 @@ public KeyRetrieverRunner(KeyRetriever keyRetriever, CertificateAuthority certif
6161

6262
}
6363

64-
@Override
65-
public void run() {
66-
try {
67-
long d = 10000; // initial delay of 10 seconds
68-
69-
while (!_run()) {
70-
logger.debug("Retrying in " + d / 1000 + " seconds");
71-
try {
72-
Thread.sleep(d);
73-
} catch (InterruptedException e) {
74-
break;
75-
}
76-
d += d / 2; // back off
77-
}
78-
79-
} finally {
80-
ca.removeKeyRetriever();
81-
}
64+
/** Return the AuthorityID of the authority whose keys this runner
65+
* attempts to retrieve. */
66+
public AuthorityID getAuthorityID() {
67+
return aid;
8268
}
8369

8470
/**
@@ -89,8 +75,12 @@ public void run() {
8975
* does not necessarily imply that the process fully
9076
* completed. See comments at sites of 'return true;'
9177
* below.
78+
*
79+
* This method has package visibility to allow KeyRetrieverWorker
80+
* to access it. You do not need to call it directly when running
81+
* a KeyRetrieverRunner in its own thread.
9282
*/
93-
private boolean _run() {
83+
boolean attemptRetrieval() {
9484

9585
CAEngine engine = CAEngine.getInstance();
9686
CAEngineConfig cs = engine.getConfig();
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
//
2+
// Copyright Red Hat, Inc.
3+
//
4+
// SPDX-License-Identifier: GPL-2.0-or-later
5+
//
6+
package com.netscape.ca;
7+
8+
import java.util.Collections;
9+
import java.util.SortedSet;
10+
import java.util.TreeSet;
11+
import java.util.Timer;
12+
import java.util.TimerTask;
13+
14+
import com.netscape.certsrv.ca.AuthorityID;
15+
16+
/**
17+
* Wrapper around a Timer to execute key retrieval attempts,
18+
* rescheduling failed attempts with backoff.
19+
*/
20+
public class KeyRetrieverWorker {
21+
22+
public final static org.slf4j.Logger logger =
23+
org.slf4j.LoggerFactory.getLogger(KeyRetrieverWorker.class);
24+
25+
private SortedSet<AuthorityID> aidsInQueue;
26+
27+
protected Timer timer;
28+
29+
/** Constructor initialises the queue and **starts the thread**.
30+
*
31+
* Only one KeyRetrieverWorker should ever be constructed.
32+
*/
33+
public KeyRetrieverWorker() {
34+
aidsInQueue = Collections.synchronizedSortedSet(new TreeSet());
35+
timer = new Timer("KeyRetrieverWorker");
36+
}
37+
38+
/** Register a key retriever with the worker thread.
39+
*
40+
* Following registration, no further action is required
41+
* by the caller. The worker thread takes care of retrieval
42+
* attempts and backoff.
43+
*/
44+
public void requestKeyRetrieval(KeyRetrieverRunner krr) {
45+
AuthorityID aid = krr.getAuthorityID();
46+
if (aidsInQueue.contains(aid)) {
47+
logger.info("KeyRetriever already enqueued for authority " + aid);
48+
return;
49+
}
50+
51+
logger.info("Queuing KeyRetriever for authority " + aid);
52+
aidsInQueue.add(aid);
53+
Request req = new Request(krr, 10000 /* initial backoff = 10s */);
54+
timer.schedule(req, 0); // attempt immediately
55+
}
56+
57+
private class Request extends TimerTask {
58+
KeyRetrieverRunner krr;
59+
long backoff_ms;
60+
61+
public Request(KeyRetrieverRunner krr, long backoff_ms) {
62+
this.krr = krr;
63+
this.backoff_ms = backoff_ms;
64+
}
65+
66+
public void run() {
67+
AuthorityID aid = krr.getAuthorityID();
68+
logger.debug(aid + ": attempt retrieval");
69+
if (krr.attemptRetrieval()) {
70+
logger.info(aid + ": successfully retrieved key");
71+
aidsInQueue.remove(aid);
72+
} else {
73+
logger.info(aid + ": failed to retrieve key; try again after "
74+
+ backoff_ms / 1000 + "s");
75+
long new_backoff = backoff_ms + backoff_ms / 2;
76+
// cannot reschedule "this" -> IllegalStateException;
77+
// therefore create a new Request and schedule that.
78+
timer.schedule(new Request(krr, new_backoff), backoff_ms);
79+
}
80+
}
81+
}
82+
83+
}

base/ca/src/main/java/org/dogtagpki/server/ca/CAEngine.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
import com.netscape.ca.CRLIssuingPoint;
7979
import com.netscape.ca.CRLIssuingPointConfig;
8080
import com.netscape.ca.CertificateAuthority;
81+
import com.netscape.ca.KeyRetrieverWorker;
8182
import com.netscape.certsrv.authentication.ISharedToken;
8283
import com.netscape.certsrv.base.BadRequestDataException;
8384
import com.netscape.certsrv.base.EBaseException;
@@ -208,6 +209,8 @@ public class CAEngine extends CMSEngine {
208209
protected AuthorityMonitor authorityMonitor;
209210
protected boolean enableAuthorityMonitor = true;
210211

212+
private KeyRetrieverWorker keyRetrieverWorker;
213+
211214
// is the current KRA-related info authoritative?
212215
private static boolean kraInfoAuthoritative = false;
213216

@@ -328,6 +331,10 @@ public boolean getEnableOCSP() {
328331
return enableOCSP;
329332
}
330333

334+
public KeyRetrieverWorker getKeyRetrieverWorker() {
335+
return keyRetrieverWorker;
336+
}
337+
331338
/**
332339
* Allows certificates to have validities that are longer
333340
* than this certificate authority's.
@@ -1087,6 +1094,10 @@ public void initSubsystems() throws Exception {
10871094
initCRLPublisher();
10881095
initPublisherProcessor();
10891096

1097+
// initialise key retriever worker (before
1098+
// initialising signing units)
1099+
keyRetrieverWorker = new KeyRetrieverWorker();
1100+
10901101
// initialize host CA signing units
10911102
hostCA.initSigningUnits();
10921103

0 commit comments

Comments
 (0)