1111
1212import org .elasticsearch .cluster .node .DiscoveryNodeRole ;
1313import org .elasticsearch .common .settings .Settings ;
14+ import org .elasticsearch .common .util .FeatureFlag ;
1415import org .elasticsearch .node .NodeRoleSettings ;
1516
1617import java .io .IOException ;
@@ -37,6 +38,8 @@ public class MachineDependentHeap {
3738 protected static final long MAX_HEAP_SIZE = GB * 31 ; // 31GB
3839 protected static final long MIN_HEAP_SIZE = 1024 * 1024 * 128 ; // 128MB
3940
41+ private static final FeatureFlag NEW_ML_MEMORY_COMPUTATION_FEATURE_FLAG = new FeatureFlag ("new_ml_memory_computation" );
42+
4043 public MachineDependentHeap () {}
4144
4245 /**
@@ -76,12 +79,16 @@ protected int getHeapSizeMb(Settings nodeSettings, MachineNodeRole role, long av
7679 /*
7780 * Machine learning only node.
7881 *
79- * <p>Heap is computed as:
80- * <ul>
81- * <li>40% of total system memory when total system memory 16 gigabytes or less.</li>
82- * <li>40% of the first 16 gigabytes plus 10% of memory above that when total system memory is more than 16 gigabytes.</li>
83- * <li>The absolute maximum heap size is 31 gigabytes.</li>
84- * </ul>
82+ * The memory reserved for Java is computed as:
83+ * - 40% of total system memory when total system memory 16 gigabytes or less.
84+ * - 40% of the first 16 gigabytes plus 10% of memory above that when total system memory is more than 16 gigabytes.
85+ * - The absolute maximum heap size is 31 gigabytes.
86+ *
87+ * This Java memory is divided as follows:
88+ * - 2/3 of the Java memory is reserved for the Java heap.
89+ * - 1/3 of the Java memory is reserved for the Java direct memory.
90+ *
91+ * The direct memory being half of the heap is set by the JvmErgonomics class.
8592 *
8693 * In all cases the result is rounded down to the next whole multiple of 4 megabytes.
8794 * The reason for doing this is that Java will round requested heap sizes to a multiple
@@ -95,13 +102,22 @@ protected int getHeapSizeMb(Settings nodeSettings, MachineNodeRole role, long av
95102 *
96103 * If this formula is changed then corresponding changes must be made to the {@code NativeMemoryCalculator} and
97104 * {@code MlAutoscalingDeciderServiceTests} classes in the ML plugin code. Failure to keep the logic synchronized
98- * could result in repeated autoscaling up and down.
105+ * could result in ML processes crashing with OOM errors or repeated autoscaling up and down.
99106 */
100107 case ML_ONLY -> {
101- if (availableMemory <= (GB * 16 )) {
102- yield mb ((long ) (availableMemory * .4 ), 4 );
108+ double heapFractionBelow16GB = 0.4 ;
109+ double heapFractionAbove16GB = 0.1 ;
110+ if (NEW_ML_MEMORY_COMPUTATION_FEATURE_FLAG .isEnabled ()) {
111+ heapFractionBelow16GB = 0.4 / (1.0 + JvmErgonomics .DIRECT_MEMORY_TO_HEAP_FACTOR );
112+ heapFractionAbove16GB = 0.1 / (1.0 + JvmErgonomics .DIRECT_MEMORY_TO_HEAP_FACTOR );
113+ }
114+ if (availableMemory <= GB * 16 ) {
115+ yield mb ((long ) (availableMemory * heapFractionBelow16GB ), 4 );
103116 } else {
104- yield mb ((long ) min ((GB * 16 ) * .4 + (availableMemory - GB * 16 ) * .1 , MAX_HEAP_SIZE ), 4 );
117+ yield mb (
118+ (long ) min (GB * 16 * heapFractionBelow16GB + (availableMemory - GB * 16 ) * heapFractionAbove16GB , MAX_HEAP_SIZE ),
119+ 4
120+ );
105121 }
106122 }
107123 /*
0 commit comments