99
1010package org .elasticsearch .index .shard ;
1111
12+ import org .apache .logging .log4j .LogManager ;
13+ import org .apache .logging .log4j .Logger ;
1214import org .elasticsearch .TransportVersion ;
1315import org .elasticsearch .TransportVersions ;
1416import org .elasticsearch .common .io .stream .StreamInput ;
2931
3032public class IndexingStats implements Writeable , ToXContentFragment {
3133
34+ private static final Logger logger = LogManager .getLogger (IndexingStats .class );
35+
3236 public static class Stats implements Writeable , ToXContentFragment {
3337 private static final TransportVersion WRITE_LOAD_AVG_SUPPORTED_VERSION = TransportVersions .V_8_6_0 ;
3438
@@ -99,15 +103,10 @@ public Stats(
99103 this .noopUpdateCount = noopUpdateCount ;
100104 this .isThrottled = isThrottled ;
101105 this .throttleTimeInMillis = throttleTimeInMillis ;
102-
103- // We store the raw unweighted write load values in order to avoid losing precision when we combine the shard stats.
104- // N.B. In add(Stats) we sum both of these, so getWriteLoad() will return the ratio of the sums, which is a weighted average of
105- // the ratios we would get for each shard.
106+ // We store the raw unweighted write load values in order to avoid losing precision when we combine the shard stats
106107 this .totalIndexingTimeSinceShardStartedInNanos = totalIndexingTimeSinceShardStartedInNanos ;
107108 this .totalActiveTimeInNanos = totalActiveTimeInNanos ;
108-
109- // We store the exponentially weighted write load value as a double. N.B. In add(Stats) we add these, and getRecentWriteLoad()
110- // will return that sum.
109+ // We store the weighted write load as a double because the calculation is inherently floating point
111110 this .recentIndexingLoad = recentIndexingLoad ;
112111 }
113112
@@ -127,9 +126,34 @@ public void add(Stats stats) {
127126 if (isThrottled != stats .isThrottled ) {
128127 isThrottled = true ; // When combining if one is throttled set result to throttled.
129128 }
129+ // TODO(pete): Remove logging of sums
130+ long tmpNum = totalIndexingTimeSinceShardStartedInNanos ;
131+ long tmpDen = totalActiveTimeInNanos ;
132+ double tmpWgt = recentIndexingLoad ;
133+ // N.B. getWriteLoad() returns the ratio of these sums, which is the average of the ratios weighted by active time:
130134 totalIndexingTimeSinceShardStartedInNanos += stats .totalIndexingTimeSinceShardStartedInNanos ;
131135 totalActiveTimeInNanos += stats .totalActiveTimeInNanos ;
132- recentIndexingLoad += stats .recentIndexingLoad ;
136+ // We want getRecentWriteLoad() for the aggregated stats to also be the average weighted by active time, so we use the updating
137+ // formula for a weighted mean:
138+ if (totalActiveTimeInNanos > 0 ) {
139+ recentIndexingLoad += (stats .recentIndexingLoad - recentIndexingLoad ) * stats .totalActiveTimeInNanos
140+ / totalActiveTimeInNanos ;
141+ }
142+ logger .info (
143+ "***** SUM UNWEIGHTED ({} / {} = {}) + ({} / {} = {}) = ({} + {} = {}) --- WEIGHTED {} + {} = {}" ,
144+ tmpNum * 1.0e6 ,
145+ tmpDen * 1.0e6 ,
146+ 1.0 * tmpNum / tmpDen ,
147+ stats .totalIndexingTimeSinceShardStartedInNanos * 1.0e6 ,
148+ stats .totalActiveTimeInNanos * 1.0e6 ,
149+ 1.0 * stats .totalIndexingTimeSinceShardStartedInNanos / stats .totalActiveTimeInNanos ,
150+ totalIndexingTimeSinceShardStartedInNanos * 1.0e6 ,
151+ totalActiveTimeInNanos * 1.0e6 ,
152+ 1.0 * totalIndexingTimeSinceShardStartedInNanos / totalActiveTimeInNanos ,
153+ tmpWgt ,
154+ stats .recentIndexingLoad ,
155+ recentIndexingLoad
156+ );
133157 }
134158
135159 /**
@@ -212,9 +236,8 @@ public long getNoopUpdateCount() {
212236 * <p>If this {@link Stats} instance represents a single shard, this is ratio of the sum of the time taken by every index operations
213237 * since the shard started to the elapsed time since the shard started.
214238 *
215- * <p>If this {@link Stats} instance represents multiple shards, this is the <b>average</b> that ratio for each shard, weighted by
216- * the elapsed time for each shard. N.B. This is a different behaviour to the {@link #getRecentWriteLoad()} method, which returns a
217- * sum over the shards.
239+ * <p>If this {@link Stats} instance represents multiple shards, this is the average of that ratio for each shard, weighted by
240+ * the elapsed time for each shard.
218241 */
219242 // TODO(pete): See which callers of this should be changed to use getRecentLoad(). Make sure that they are single-shard!
220243 public double getWriteLoad () {
@@ -227,8 +250,8 @@ public double getWriteLoad() {
227250 * <p>If this {@link Stats} instance represents a single shard, this is an Exponentially Weighted Moving Rate based on the time
228251 * taken by indexing operations in this shard since the shard started.
229252 *
230- * <p>If this {@link Stats} instance represents multiple shards, this is the <b>sum</b> that rate for each shard. N.B. This is a
231- * different behaviour to the {@link #getWriteLoad()} method, which returns an average over the shards .
253+ * <p>If this {@link Stats} instance represents multiple shards, this is the average of that ratio for each shard, weighted by
254+ * the elapsed time for each shard .
232255 */
233256 public double getRecentWriteLoad () {
234257 return recentIndexingLoad ;
0 commit comments