Skip to content

Commit f67b718

Browse files
authored
SOLR-17203 fix calculation for CpuAllowedLimit (#2347)
1 parent a4fbad5 commit f67b718

File tree

4 files changed

+59
-46
lines changed

4 files changed

+59
-46
lines changed

solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -242,15 +242,15 @@ public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
242242
rsp.setHttpCaching(false);
243243
}
244244
} catch (Exception e) {
245-
e = normalizeReceivedException(req, e);
246-
processErrorMetricsOnException(e, metrics);
247-
rsp.setException(e);
245+
Exception normalized = normalizeReceivedException(req, e);
246+
processErrorMetricsOnException(normalized, metrics);
247+
rsp.setException(normalized);
248248
} finally {
249249
long elapsed = timer.stop();
250250
metrics.totalTime.inc(elapsed);
251251

252-
if (publishCpuTime) {
253-
Optional<Long> cpuTime = threadCpuTimer.getCpuTimeMs();
252+
if (publishCpuTime && threadCpuTimer != null) {
253+
Optional<Long> cpuTime = threadCpuTimer.getElapsedCpuMs();
254254
if (cpuTime.isPresent()) {
255255
// add CPU_TIME if not already added by SearchHandler
256256
NamedList<Object> header = rsp.getResponseHeader();

solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,29 @@
1717
package org.apache.solr.search;
1818

1919
import com.google.common.annotations.VisibleForTesting;
20-
import java.lang.invoke.MethodHandles;
2120
import java.util.concurrent.TimeUnit;
21+
import net.jcip.annotations.NotThreadSafe;
2222
import org.apache.lucene.index.QueryTimeout;
2323
import org.apache.solr.common.params.CommonParams;
2424
import org.apache.solr.request.SolrQueryRequest;
2525
import org.apache.solr.request.SolrRequestInfo;
2626
import org.apache.solr.util.ThreadCpuTimer;
27-
import org.slf4j.Logger;
28-
import org.slf4j.LoggerFactory;
2927

3028
/**
3129
* Enforces a CPU-time based timeout on a given SolrQueryRequest, as specified by the {@code
3230
* cpuAllowed} query parameter.
31+
*
32+
* <p>Since this class uses {@link ThreadCpuTimer} it is irrevocably lock-hostile and can never be
33+
* exposed to multiple threads, even if guarded by synchronization. Normally this is attached to
34+
* objects ultimately held by a ThreadLocal in {@link SolrRequestInfo} to provide safe usage on the
35+
* assumption that such objects are not shared to other threads.
36+
*
37+
* @see ThreadCpuTimer
3338
*/
39+
@NotThreadSafe
3440
public class CpuAllowedLimit implements QueryTimeout {
35-
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
36-
37-
private final long limitAtNs;
3841
private final ThreadCpuTimer threadCpuTimer;
42+
private final long requestedTimeoutNs;
3943

4044
/**
4145
* Create an object to represent a CPU time limit for the current request. NOTE: this
@@ -49,6 +53,8 @@ public CpuAllowedLimit(SolrQueryRequest req) {
4953
throw new IllegalArgumentException("Thread CPU time monitoring is not available.");
5054
}
5155
SolrRequestInfo solrRequestInfo = SolrRequestInfo.getRequestInfo();
56+
// get existing timer if available to ensure sub-queries can't reset/exceed the intended time
57+
// constraint.
5258
threadCpuTimer =
5359
solrRequestInfo != null ? solrRequestInfo.getThreadCpuTimer() : new ThreadCpuTimer();
5460
long reqCpuLimit = req.getParams().getLong(CommonParams.CPU_ALLOWED, -1L);
@@ -57,28 +63,24 @@ public CpuAllowedLimit(SolrQueryRequest req) {
5763
throw new IllegalArgumentException(
5864
"Check for limit with hasCpuLimit(req) before creating a CpuAllowedLimit");
5965
}
60-
// calculate when the time limit is reached, account for the time already spent
61-
limitAtNs =
62-
threadCpuTimer.getStartCpuTimeNs()
63-
+ TimeUnit.NANOSECONDS.convert(reqCpuLimit, TimeUnit.MILLISECONDS);
66+
// calculate the time when the limit is reached, e.g. account for the time already spent
67+
requestedTimeoutNs = TimeUnit.NANOSECONDS.convert(reqCpuLimit, TimeUnit.MILLISECONDS);
6468
}
6569

6670
@VisibleForTesting
6771
CpuAllowedLimit(long limitMs) {
6872
this.threadCpuTimer = new ThreadCpuTimer();
69-
limitAtNs =
70-
threadCpuTimer.getCurrentCpuTimeNs()
71-
+ TimeUnit.NANOSECONDS.convert(limitMs, TimeUnit.MILLISECONDS);
73+
requestedTimeoutNs = TimeUnit.NANOSECONDS.convert(limitMs, TimeUnit.MILLISECONDS);
7274
}
7375

7476
/** Return true if the current request has a parameter with a valid value of the limit. */
7577
static boolean hasCpuLimit(SolrQueryRequest req) {
7678
return req.getParams().getLong(CommonParams.CPU_ALLOWED, -1L) > 0L;
7779
}
7880

79-
/** Return true if a max limit value is set and the current usage has exceeded the limit. */
81+
/** Return true if usage has exceeded the limit. */
8082
@Override
8183
public boolean shouldExit() {
82-
return limitAtNs - threadCpuTimer.getCurrentCpuTimeNs() < 0L;
84+
return threadCpuTimer.getElapsedCpuNs() > requestedTimeoutNs;
8385
}
8486
}

solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.lang.management.ThreadMXBean;
2222
import java.util.Optional;
2323
import java.util.concurrent.TimeUnit;
24+
import net.jcip.annotations.NotThreadSafe;
2425
import org.slf4j.Logger;
2526
import org.slf4j.LoggerFactory;
2627

@@ -29,8 +30,13 @@
2930
* {@link java.lang.management.ThreadMXBean}.
3031
*
3132
* <p>Calling code should create an instance of this class when starting the operation, and then can
32-
* get the {@link #getCpuTimeMs()} at any time thereafter.
33+
* get the {@link #getElapsedCpuMs()} at any time thereafter.
34+
*
35+
* <p>This class is irrevocably not thread safe. Never allow instances of this class to be exposed
36+
* to more than one thread. Acquiring an external lock will not be sufficient. This class can be
37+
* considered "lock-hostile" due to its caching of timing information for a specific thread.
3338
*/
39+
@NotThreadSafe
3440
public class ThreadCpuTimer {
3541
private static final long UNSUPPORTED = -1;
3642
public static final String CPU_TIME = "cpuTime";
@@ -56,61 +62,59 @@ public class ThreadCpuTimer {
5662
private final long startCpuTimeNanos;
5763

5864
/**
59-
* Create an instance to track the current thread's usage of CPU. The usage information can later
60-
* be retrieved by any thread by calling {@link #getCpuTimeMs()}.
65+
* Create an instance to track the current thread's usage of CPU. Usage information can later be
66+
* retrieved by calling {@link #getElapsedCpuMs()}. Timing starts immediately upon construction.
6167
*/
6268
public ThreadCpuTimer() {
63-
if (THREAD_MX_BEAN != null) {
64-
this.startCpuTimeNanos = THREAD_MX_BEAN.getCurrentThreadCpuTime();
65-
} else {
66-
this.startCpuTimeNanos = UNSUPPORTED;
67-
}
69+
this.startCpuTimeNanos = getThreadTotalCpuNs();
6870
}
6971

7072
public static boolean isSupported() {
7173
return THREAD_MX_BEAN != null;
7274
}
7375

7476
/**
75-
* Return the initial value of CPU time for this thread when this instance was first created.
76-
* NOTE: absolute value returned by this method has no meaning by itself, it should only be used
77-
* when comparing elapsed time between this value and {@link #getCurrentCpuTimeNs()}.
77+
* Return CPU time consumed by this thread since the construction of this timer object.
7878
*
7979
* @return current value, or {@link #UNSUPPORTED} if not supported.
8080
*/
81-
public long getStartCpuTimeNs() {
82-
return startCpuTimeNanos;
81+
public long getElapsedCpuNs() {
82+
return this.startCpuTimeNanos != UNSUPPORTED
83+
? getThreadTotalCpuNs() - this.startCpuTimeNanos
84+
: UNSUPPORTED;
8385
}
8486

8587
/**
86-
* Return current value of CPU time for this thread.
88+
* Get the cpu time for the current thread since {@link Thread#start()} without throwing an
89+
* exception.
8790
*
88-
* @return current value, or {@link #UNSUPPORTED} if not supported.
91+
* @see ThreadMXBean#getCurrentThreadCpuTime() for important details
92+
* @return the number of nanoseconds of cpu consumed by this thread since {@code Thread.start()}.
8993
*/
90-
public long getCurrentCpuTimeNs() {
94+
private long getThreadTotalCpuNs() {
9195
if (THREAD_MX_BEAN != null) {
92-
return this.startCpuTimeNanos != UNSUPPORTED
93-
? THREAD_MX_BEAN.getCurrentThreadCpuTime() - this.startCpuTimeNanos
94-
: UNSUPPORTED;
96+
return THREAD_MX_BEAN.getCurrentThreadCpuTime();
9597
} else {
9698
return UNSUPPORTED;
9799
}
98100
}
99101

100102
/**
101-
* Get the CPU usage information for the thread that created this {@link ThreadCpuTimer}. The
102-
* information will track the thread's cpu since the creation of this {@link ThreadCpuTimer}
103-
* instance, if the VM's cpu tracking is disabled, returned value will be {@link #UNSUPPORTED}.
103+
* Get the CPU usage information for the current thread since it created this {@link
104+
* ThreadCpuTimer}. The result is undefined if called by any other thread.
105+
*
106+
* @return the thread's cpu since the creation of this {@link ThreadCpuTimer} instance. If the
107+
* VM's cpu tracking is disabled, returned value will be {@link #UNSUPPORTED}.
104108
*/
105-
public Optional<Long> getCpuTimeMs() {
106-
long cpuTimeNs = getCurrentCpuTimeNs();
109+
public Optional<Long> getElapsedCpuMs() {
110+
long cpuTimeNs = getElapsedCpuNs();
107111
return cpuTimeNs != UNSUPPORTED
108112
? Optional.of(TimeUnit.MILLISECONDS.convert(cpuTimeNs, TimeUnit.NANOSECONDS))
109-
: Optional.of(UNSUPPORTED);
113+
: Optional.empty();
110114
}
111115

112116
@Override
113117
public String toString() {
114-
return getCpuTimeMs().toString();
118+
return getElapsedCpuMs().map(String::valueOf).orElse("UNSUPPORTED");
115119
}
116120
}

solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,4 +208,11 @@ public void testDistribLimit() throws Exception {
208208
// System.err.println("rsp=" + rsp.jsonStr());
209209
assertNotNull("should have partial results", rsp.getHeader().get("partialResults"));
210210
}
211+
212+
@Test
213+
public void testDistribLimit2() throws Exception {
214+
// This looks silly, but it actually guards against:
215+
// https://issues.apache.org/jira/browse/SOLR-17203
216+
testDistribLimit();
217+
}
211218
}

0 commit comments

Comments
 (0)