Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

package com.aws.greengrass.telemetry;

import com.aws.greengrass.telemetry.models.TelemetryUnit;
import com.fasterxml.jackson.annotation.JsonAnyGetter;
import com.fasterxml.jackson.annotation.JsonAnySetter;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.Setter;

import java.util.HashMap;
import java.util.Map;
Expand All @@ -27,19 +27,16 @@ public class AggregatedMetric {
// TODO: We do not need this to be a map. This map assumes that a metric can have multiple aggregation types and
// values, which is incorrect. This can just be replaced by a String (for aggregation type)
// and an Object (for value).
@Setter
private Map<String, Object> value = new HashMap<>();
@JsonProperty("U")
private TelemetryUnit unit;
private String unit;

@JsonAnyGetter
public Map<String, Object> getValue() {
return value;
}

public void setValue(Map<String, Object> value) {
this.value = value;
}

@JsonAnySetter
public void jsonAggregationValue(final String name, final Object value) {
this.value.put(name, value);
Expand Down
124 changes: 85 additions & 39 deletions src/main/java/com/aws/greengrass/telemetry/MetricsAggregator.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import com.aws.greengrass.telemetry.impl.TelemetryLoggerMessage;
import com.aws.greengrass.telemetry.impl.config.TelemetryConfig;
import com.aws.greengrass.util.Coerce;
import com.aws.greengrass.util.platforms.Platform;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;

Expand All @@ -28,6 +29,8 @@
import java.util.Set;
import java.util.stream.Stream;

import static com.aws.greengrass.telemetry.SystemMetricsEmitter.NAMESPACE;

public class MetricsAggregator {
public static final Logger logger = LogManager.getLogger(MetricsAggregator.class);
protected static final String AGGREGATE_METRICS_FILE = "AggregateMetrics";
Expand Down Expand Up @@ -84,13 +87,23 @@ protected void aggregateMetrics(long lastAgg, long currTimestamp) {
try (Stream<String> logs = Files.lines(path)) {
logs.forEach((log) -> {
try {
/* {"thread":"pool-3-thread-4","level":"TRACE","eventType":null,"message":"{\"NS\":

\"SystemMetrics\",\"N\":\"TotalNumberOfFDs\",\"U\":\"Count\",\"A\":\"Average\",\"V\"

:4583,\"TS\":1600127641506}","contexts":{},"loggerName":"Metrics-SystemMetrics",

"timestamp":1600127641506,"cause":null} */
/* {
"thread": "pool-3-thread-4",
"level": "TRACE",
"eventType": null,
"message": {
"NS": "SystemMetrics",
"N": "TotalNumberOfFDs",
"U": "Count",
"A": "Average",
"V": 4583,
"TS": 1600127641506
},
"contexts": {},
"loggerName": "Metrics-SystemMetrics",
"timestamp": 1600127641506,
"cause": null
} */
GreengrassLogMessage egLog = objectMapper.readValue(log,
GreengrassLogMessage.class);
Metric mdp = objectMapper.readValue(egLog.getMessage(), Metric.class);
Expand Down Expand Up @@ -154,7 +167,7 @@ private List<AggregatedMetric> doAggregation(Map<String, List<Metric>> map) {
value.put(aggregationType, aggregation);
AggregatedMetric m = AggregatedMetric.builder()
.name(metricName)
.unit(metrics.get(0).getUnit())
.unit(String.valueOf(metrics.get(0).getUnit()))
.value(value)
.build();
aggMetrics.add(m);
Expand Down Expand Up @@ -183,15 +196,23 @@ protected Map<Long, List<AggregatedNamespaceData>> getMetricsToPublish(long last
try (Stream<String> logs = Files.lines(path)) {
logs.forEach(log -> {
try {
/* {"thread":"main","level":"TRACE","eventType":null,

"message":"{\"TS\":1599617227533,\"NS\":\"SystemMetrics\",\"M\":[{\"N\":\"CpuUsage\",

\"V\":60.0,\"U\":\"Percent\"},{\"N\":\"TotalNumberOfFDs\",\"V\":6000.0,\"U\":\"Count\"},

{\"N\":\"SystemMemUsage\",\"V\":3000.0,\"U\":\"Megabytes\"}]}","contexts":{},"loggerName":

"Metrics-AggregateMetrics","timestamp":1599617227595,"cause":null} */
/* {
"thread": "pool-3-thread-4",
"level": "TRACE",
"eventType": null,
"message": {
"NS": "SystemMetrics",
"N": "TotalNumberOfFDs",
"U": "Count",
"A": "Average",
"V": 4583,
"TS": 1600127641506
},
"contexts": {},
"loggerName": "Metrics-SystemMetrics",
"timestamp": 1600127641506,
"cause": null
} */
GreengrassLogMessage egLog = objectMapper.readValue(log,
GreengrassLogMessage.class);
AggregatedNamespaceData am = objectMapper.readValue(egLog.getMessage(),
Expand All @@ -214,37 +235,62 @@ protected Map<Long, List<AggregatedNamespaceData>> getMetricsToPublish(long last
}

// If there are no metrics to be published, then we should return and not publish any telemetry messages.
if (aggUploadMetrics.isEmpty()) {
return aggUploadMetrics;
if (!aggUploadMetrics.isEmpty()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we still be returning fast if aggUploadMetrics is empty?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or is the thought here that we will always add the kernel and OS metrics

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the latter. It is possible that we publish before any other metrics are even aggregated. In any case, we should atleast transmit the OS and kernel metrics.

// Along with the aggregated data points, we need to collect an additional data point for each metric which
// is like the aggregation of aggregated data points.
// TODO: [P41214598] Get accumulated data points during aggregation and cache it to the disk.
aggUploadMetrics.computeIfPresent(currTimestamp, (k, v) -> {
v.addAll(getAggForThePublishInterval(aggUploadMetrics.get(currTimestamp), currTimestamp));
return v;
});

// TODO: [P41214636] Verify the aggregation type of v2 metrics. As of now, all the v1
// metrics have "Sum" aggregation type and so is the cloud validation.
// The following code changes any aggregation type of the metrics to "Sum" only in the final result to keep
// it compatible with v1 and UATs for now. However, metrics are still defined and aggregated with on their
// own aggregation type.
aggUploadMetrics.forEach((k, v) -> v.forEach(nsd -> nsd.getMetrics().forEach(m -> {
Map<String, Object> value = new HashMap<>();
m.getValue().values().forEach((val) -> {
value.put("Sum", val);
m.setValue(value);
});
})));
}

// Along with the aggregated data points, we need to collect an additional data point for each metric which is
// like the aggregation of aggregated data points.
// TODO: [P41214598] Get accumulated data points during aggregation and cache it to the disk.
// Add kernel and OS metrics
aggUploadMetrics.computeIfPresent(currTimestamp, (k, v) -> {
v.addAll(getAggForThePublishInterval(aggUploadMetrics.get(currTimestamp), currTimestamp));
v.add(AggregatedNamespaceData.builder()
.timestamp(currTimestamp)
.namespace(NAMESPACE)
.metrics(getKernelAndOSMetrics())
.build());
return v;
});

// TODO: [P41214636] Verify the aggregation type of v2 metrics. As of now, all the v1
// metrics have "Sum" aggregation type and so is the cloud validation.
// The following code changes any aggregation type of the metrics to "Sum" only in the final result to keep it
// compatible with v1 and UATs for now. However, metrics are still defined and aggregated with on their own
// aggregation type.
aggUploadMetrics.forEach((k, v) -> {
v.forEach(nsd -> {
nsd.getMetrics().forEach(m -> {
Map<String, Object> value = new HashMap<>();
m.getValue().values().forEach((val) -> {
value.put("Sum", val);
m.setValue(value);
});
});
});
});
try {
logger.atDebug().kv("metrics", new ObjectMapper().writeValueAsString(aggUploadMetrics))
.log("Preparing to upload metrics");
} catch (JsonProcessingException e) {
logger.atWarn().setCause(e).log("Could not convert aggregated metrics to json, continuing");
}
return aggUploadMetrics;
}

@SuppressWarnings("PMD.DoubleBraceInitialization")
protected List<AggregatedMetric> getKernelAndOSMetrics() {
List<AggregatedMetric> kernelAndOSMetrics = new ArrayList<>();
Platform.getInstance().getOSAndKernelMetrics().forEach((key, value) ->
kernelAndOSMetrics.add(AggregatedMetric.builder()
.name(key)
.unit(Coerce.toString(value))
.value(new HashMap<String, Object>() {{
put("Sum", 1.0);
}})
.build()));
return kernelAndOSMetrics;
}

/**
* This function takes a list of aggregated metrics and returns their aggregation in a list(Aggregation of
* aggregated metrics). This is published to the cloud along with the aggregated metric points
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ void schedulePeriodicPublishMetrics(boolean isReconfigured) {
return;
}

int periodicPublishMetricsIntervalSec = configuration.getPeriodicPublishMetricsIntervalSeconds();
int maxInitialDelay = periodicPublishMetricsIntervalSec;
// Publish metrics quicker
int maxInitialDelay = configuration.getPeriodicAggregateMetricsIntervalSeconds();
if (isReconfigured) {
Instant lastPeriodicPubTime = Instant.ofEpochMilli(Coerce.toLong(getPeriodicPublishTimeTopic()));
if (lastPeriodicPubTime.plusSeconds(configuration.getPeriodicPublishMetricsIntervalSeconds())
Expand All @@ -231,7 +231,7 @@ void schedulePeriodicPublishMetrics(boolean isReconfigured) {
// all the devices to publish metrics at the same time.
long initialDelay = RandomUtils.nextLong(0, maxInitialDelay + 1);
periodicPublishMetricsFuture = ses.scheduleWithFixedDelay(this::publishPeriodicMetrics, initialDelay,
periodicPublishMetricsIntervalSec, TimeUnit.SECONDS);
configuration.getPeriodicPublishMetricsIntervalSeconds(), TimeUnit.SECONDS);
}

/**
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/com/aws/greengrass/util/platforms/Platform.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.nio.file.attribute.UserPrincipalLookupService;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.Lock;

Expand Down Expand Up @@ -236,6 +237,8 @@ protected abstract FileSystemPermissionView getFileSystemPermissionView(FileSyst

public abstract String loaderFilename();

public abstract Map<String, Object> getOSAndKernelMetrics();

protected static class FileSystemPermissionView {
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import oshi.SystemInfo;
import oshi.software.os.OSProcess;
import oshi.software.os.OperatingSystem;
import oshi.software.os.OperatingSystem.OSVersionInfo;

import java.io.BufferedReader;
import java.io.IOException;
Expand All @@ -41,6 +42,8 @@
import java.nio.file.attribute.UserPrincipal;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.locks.Lock;
Expand Down Expand Up @@ -83,8 +86,9 @@ public class UnixPlatform extends Platform {

private final SystemResourceController systemResourceController = new StubResourceController();
private final UnixRunWithGenerator runWithGenerator;

private final OperatingSystem oshiOs = new SystemInfo().getOperatingSystem();
private final SystemInfo oshiSystemInfo = new SystemInfo();
private final OperatingSystem oshiOs = oshiSystemInfo.getOperatingSystem();
private final OSVersionInfo oshiVersionInfo = oshiOs.getVersionInfo();

/**
* Construct a new instance.
Expand Down Expand Up @@ -696,6 +700,18 @@ public String loaderFilename() {
return "loader";
}

@SuppressWarnings("PMD.DoubleBraceInitialization")
@Override
public Map<String, Object> getOSAndKernelMetrics() {
return new HashMap<String, Object>() {{
put("OSName", oshiOs.getFamily());
put("OSVersion", oshiVersionInfo.getVersion());
put("KernelVersion", oshiVersionInfo.getBuildNumber());
put("CPUArchitecture", oshiSystemInfo.getHardware().getProcessor().getProcessorIdentifier()
.getMicroarchitecture());
}};
}

private enum IdOption {
User, Group
}
Expand Down
Loading
Loading