Skip to content

Commit f476b3a

Browse files
pfrcksAmol Agrawal
andauthored
set mdsd limits (#1027)
* set mdsd limit using container memory limit --------- Co-authored-by: Amol Agrawal <amagraw@microsoft.com>
1 parent 9a96a82 commit f476b3a

File tree

8 files changed

+94
-13
lines changed

8 files changed

+94
-13
lines changed

build/common/installer/scripts/tomlparser-agent-config.rb

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
# Checking to see if container is not prometheus sidecar.
8484
# CONTAINER_TYPE is populated only for prometheus sidecar container.
8585
@containerType = ENV["CONTAINER_TYPE"]
86+
@containerMemoryLimitInBytes = ENV["CONTAINER_MEMORY_LIMIT_IN_BYTES"]
8687

8788
@promFbitChunkSize = 0
8889
@promFbitBufferSize = 0
@@ -268,11 +269,12 @@ def populateSettingValuesFromConfigMap(parsedConfig)
268269
puts "Using config map value: require_ack_response = #{@requireAckResponse}"
269270
end
270271
end
271-
# ama-logs daemonset only settings
272-
if !@controllerType.nil? && !@controllerType.empty? && @controllerType.strip.casecmp(@daemonset) == 0 && @containerType.nil?
273-
# mdsd settings
274-
mdsd_config = parsedConfig[:agent_settings][:mdsd_config]
275-
if !mdsd_config.nil?
272+
273+
# mdsd settings
274+
mdsd_config = parsedConfig[:agent_settings][:mdsd_config]
275+
if !mdsd_config.nil?
276+
# ama-logs daemonset only settings
277+
if !@controllerType.nil? && !@controllerType.empty? && @controllerType.strip.casecmp(@daemonset) == 0 && @containerType.nil?
276278
mdsdMonitoringMaxEventRate = mdsd_config[:monitoring_max_event_rate]
277279
if is_valid_number?(mdsdMonitoringMaxEventRate)
278280
@mdsdMonitoringMaxEventRate = mdsdMonitoringMaxEventRate.to_i
@@ -288,13 +290,6 @@ def populateSettingValuesFromConfigMap(parsedConfig)
288290
@mdsdUploadFrequencyInSeconds = mdsdUploadFrequencyInSeconds.to_i
289291
puts "Using config map value: upload_frequency_seconds = #{@mdsdUploadFrequencyInSeconds}"
290292
end
291-
mdsdBackPressureThresholdInMB = mdsd_config[:backpressure_memory_threshold_in_mb]
292-
if is_valid_number?(mdsdBackPressureThresholdInMB) && mdsdBackPressureThresholdInMB.to_i > 100
293-
@mdsdBackPressureThresholdInMB = mdsdBackPressureThresholdInMB.to_i
294-
puts "Using config map value: backpressure_memory_threshold_in_mb = #{@mdsdBackPressureThresholdInMB}"
295-
else
296-
puts "Ignoring mdsd backpressure limit. Check input values for correctness."
297-
end
298293
mdsdCompressionLevel = mdsd_config[:compression_level]
299294
if is_number?(mdsdCompressionLevel) && mdsdCompressionLevel.to_i >= 0 && mdsdCompressionLevel.to_i < 10 # supported levels from 0 to 9
300295
@mdsdCompressionLevel = mdsdCompressionLevel.to_i
@@ -303,6 +298,14 @@ def populateSettingValuesFromConfigMap(parsedConfig)
303298
puts "Ignoring mdsd compression_level level since its not supported level. Check input values for correctness."
304299
end
305300
end
301+
302+
mdsdBackPressureThresholdInMB = mdsd_config[:backpressure_memory_threshold_in_mb]
303+
if is_valid_number?(mdsdBackPressureThresholdInMB) && is_valid_number?(@containerMemoryLimitInBytes) && mdsdBackPressureThresholdInMB.to_i < (@containerMemoryLimitInBytes.to_i / 1048576) && mdsdBackPressureThresholdInMB.to_i > 100
304+
@mdsdBackPressureThresholdInMB = mdsdBackPressureThresholdInMB.to_i
305+
puts "Using config map value: backpressure_memory_threshold_in_mb = #{@mdsdBackPressureThresholdInMB}"
306+
else
307+
puts "Ignoring mdsd backpressure limit. Check input values for correctness. Configmap value in mb: #{mdsdBackPressureThresholdInMB}, container limit in bytes: #{@containerMemoryLimitInBytes}"
308+
end
306309
end
307310

308311
prom_fbit_config = nil
@@ -443,7 +446,7 @@ def populateSettingValuesFromConfigMap(parsedConfig)
443446
end
444447

445448
if @mdsdBackPressureThresholdInMB > 0
446-
file.write("export MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB=#{@mdsdBackPressureThresholdInMB}\n")
449+
file.write("export BACKPRESSURE_THRESHOLD_IN_MB=#{@mdsdBackPressureThresholdInMB}\n")
447450
end
448451

449452
if @mdsdCompressionLevel >= 0

build/common/installer/scripts/tomlparser-prom-agent-config.rb

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,18 @@
1616
@waittime_port_25226 = 45
1717
@waittime_port_25228 = 120
1818
@waittime_port_25229 = 45
19+
@containerMemoryLimitInBytes = ENV["CONTAINER_MEMORY_LIMIT_IN_BYTES"]
20+
@mdsdBackPressureThresholdInMB = 0
1921

2022
def is_number?(value)
2123
true if Integer(value) rescue false
2224
end
2325

26+
# check if it is number and greater than 0
27+
def is_valid_number?(value)
28+
return !value.nil? && is_number?(value) && value.to_i > 0
29+
end
30+
2431
# check if it is a valid waittime
2532
def is_valid_waittime?(value, default)
2633
return !value.nil? && is_number?(value) && value.to_i >= default/2 && value.to_i <= 3*default
@@ -94,6 +101,18 @@ def populateSettingValuesFromConfigMap(parsedConfig)
94101
end
95102
end
96103

104+
# mdsd settings
105+
mdsd_config = parsedConfig[:agent_settings][:mdsd_config]
106+
if !mdsd_config.nil?
107+
mdsdBackPressureThresholdInMB = mdsd_config[:backpressure_memory_threshold_in_mb]
108+
if is_valid_number?(mdsdBackPressureThresholdInMB) && is_valid_number?(@containerMemoryLimitInBytes) && mdsdBackPressureThresholdInMB.to_i < (@containerMemoryLimitInBytes.to_i / 1048576) && mdsdBackPressureThresholdInMB.to_i > 100
109+
@mdsdBackPressureThresholdInMB = mdsdBackPressureThresholdInMB.to_i
110+
puts "Using config map value: backpressure_memory_threshold_in_mb = #{@mdsdBackPressureThresholdInMB}"
111+
else
112+
puts "Ignoring mdsd backpressure limit. Check input values for correctness. Configmap value in mb: #{mdsdBackPressureThresholdInMB}, container limit in bytes: #{@containerMemoryLimitInBytes}"
113+
end
114+
end
115+
97116
end
98117
rescue => errorStr
99118
puts "config::error:Exception while reading config settings for sidecar agent configuration setting - #{errorStr}, using defaults"
@@ -124,6 +143,11 @@ def populateSettingValuesFromConfigMap(parsedConfig)
124143
file.write("export WAITTIME_PORT_25226=#{@waittime_port_25226}\n")
125144
file.write("export WAITTIME_PORT_25228=#{@waittime_port_25228}\n")
126145
file.write("export WAITTIME_PORT_25229=#{@waittime_port_25229}\n")
146+
147+
if @mdsdBackPressureThresholdInMB > 0
148+
file.write("export BACKPRESSURE_THRESHOLD_IN_MB=#{@mdsdBackPressureThresholdInMB}\n")
149+
end
150+
127151
# Close file after writing all environment variables
128152
file.close
129153
else

charts/azuremonitor-containers/templates/ama-logs-daemonset-windows.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ spec:
8585
valueFrom:
8686
fieldRef:
8787
fieldPath: spec.nodeName
88+
- name: CONTAINER_MEMORY_LIMIT_IN_BYTES
89+
valueFrom:
90+
resourceFieldRef:
91+
containerName: ama-logs-windows
92+
resource: limits.memory
8893
- name: NODE_IP
8994
valueFrom:
9095
fieldRef:

charts/azuremonitor-containers/templates/ama-logs-daemonset.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ spec:
8383
valueFrom:
8484
fieldRef:
8585
fieldPath: status.hostIP
86+
- name: CONTAINER_MEMORY_LIMIT_IN_BYTES
87+
valueFrom:
88+
resourceFieldRef:
89+
containerName: ama-logs
90+
resource: limits.memory
8691
{{- if not (empty .Values.Azure.Extension.Name) }}
8792
- name: ARC_K8S_EXTENSION_NAME
8893
value: {{ .Values.Azure.Extension.Name | quote }}
@@ -210,6 +215,11 @@ spec:
210215
valueFrom:
211216
fieldRef:
212217
fieldPath: status.hostIP
218+
- name: CONTAINER_MEMORY_LIMIT_IN_BYTES
219+
valueFrom:
220+
resourceFieldRef:
221+
containerName: ama-logs-prometheus
222+
resource: limits.memory
213223
- name: ISTEST
214224
value: {{ .Values.amalogs.ISTEST | quote }}
215225
- name: HOSTNAME

charts/azuremonitor-containers/templates/ama-logs-deployment.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ spec:
5858
resourceFieldRef:
5959
containerName: ama-logs
6060
resource: limits.cpu
61+
- name: CONTAINER_MEMORY_LIMIT_IN_BYTES
62+
valueFrom:
63+
resourceFieldRef:
64+
containerName: ama-logs
65+
resource: limits.memory
6166
{{- if ne .Values.amalogs.env.clusterId "<your_cluster_id>" }}
6267
- name: AKS_RESOURCE_ID
6368
value: {{ .Values.amalogs.env.clusterId | quote }}

kubernetes/linux/main.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,28 @@ else
855855
fi
856856
source ~/.bashrc
857857

858+
# manually set backpressure value using container limit only when neither backpressure or fbit tail buffer is provided through configmap
859+
if [ -n "${BACKPRESSURE_THRESHOLD_IN_MB}" ]; then
860+
export MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB=${BACKPRESSURE_THRESHOLD_IN_MB}
861+
echo "export MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB=$MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB" >> ~/.bashrc
862+
echo "Setting MDSD backpressure threshold from configmap: ${MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB} MB"
863+
source ~/.bashrc
864+
elif [ -z "${FBIT_TAIL_MEM_BUF_LIMIT}" ]; then
865+
if [ -n "${CONTAINER_MEMORY_LIMIT_IN_BYTES}" ]; then
866+
echo "Container limit in bytes: ${CONTAINER_MEMORY_LIMIT_IN_BYTES}"
867+
limit_in_mebibytes=$((CONTAINER_MEMORY_LIMIT_IN_BYTES / 1048576))
868+
869+
export MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB=$((limit_in_mebibytes * 50 / 100))
870+
echo "export MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB=$MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB" >> ~/.bashrc
871+
echo "Setting MDSD backpressure threshold as 50 percent of container limit: ${MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB} MB"
872+
source ~/.bashrc
873+
else
874+
echo "Container limit not found. Not setting mdsd backpressure threshold"
875+
fi
876+
else
877+
echo "MDSD backpressure threshold not set since tail_mem_buf_limit_megabytes is used in configmap. Use backpressure_memory_threshold_in_mb in configmap to set it."
878+
fi
879+
858880
if [ "${CONTAINER_TYPE}" == "PrometheusSidecar" ]; then
859881
if [ "${MUTE_PROM_SIDECAR}" != "true" ]; then
860882
echo "starting mdsd with mdsd-port=26130, fluentport=26230 and influxport=26330 in sidecar container..."

source/plugins/go/src/telemetry.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,11 @@ func SendContainerLogPluginMetrics(telemetryPushIntervalProperty string) {
185185
telemetryDimensions["PromFbitBufferSize"] = os.Getenv("AZMON_SIDECAR_FBIT_BUFFER_SIZE")
186186
telemetryDimensions["PromFbitMemBufLimit"] = os.Getenv("AZMON_SIDECAR_FBIT_MEM_BUF_LIMIT")
187187

188+
mdsdBackPressureThresholdInMB := os.Getenv("MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB")
189+
if mdsdBackPressureThresholdInMB != "" {
190+
telemetryDimensions["mdsdBackPressureThresholdInMB"] = mdsdBackPressureThresholdInMB
191+
}
192+
188193
SendEvent(eventNameCustomPrometheusSidecarHeartbeat, telemetryDimensions)
189194

190195
} else {

source/plugins/ruby/in_kube_nodes.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def initialize(is_unit_test_mode = nil, kubernetesApiClient = nil,
5050
@@rsPromMonitorPodsFieldSelectorLength = @env["TELEMETRY_RS_PROM_FIELD_SELECTOR_LENGTH"]
5151
@@collectAllKubeEvents = @env["AZMON_CLUSTER_COLLECT_ALL_KUBE_EVENTS"]
5252
@@osmNamespaceCount = @env["TELEMETRY_OSM_CONFIGURATION_NAMESPACES_COUNT"]
53+
@@mdsdBackPressureThresholdInMB = @env["MDSD_BACKPRESSURE_MONITOR_MEMORY_THRESHOLD_IN_MB"]
5354

5455
@ContainerNodeInventoryTag = "oneagent.containerInsights.CONTAINER_NODE_INVENTORY_BLOB"
5556
@insightsMetricsTag = "oneagent.containerInsights.INSIGHTS_METRICS_BLOB"
@@ -393,6 +394,12 @@ def parse_and_emit_records(nodeInventory, batchTime = Time.utc.iso8601)
393394
if (File.file?(@@osmConfigMountPath))
394395
properties["osmNamespaceCount"] = @@osmNamespaceCount
395396
end
397+
398+
# telemetry about mdsd backpressure limits for replicaset
399+
if (!@@mdsdBackPressureThresholdInMB.nil?) && (!@@mdsdBackPressureThresholdInMB.empty?)
400+
properties["mdsdBackPressureThresholdInMB"] = @@mdsdBackPressureThresholdInMB
401+
end
402+
396403
@applicationInsightsUtility.sendMetricTelemetry("NodeCoreCapacity", capacityInfo["cpu"], properties)
397404
telemetrySent = true
398405
rescue => errorStr

0 commit comments

Comments
 (0)