Skip to content

Commit 82ae224

Browse files
authored
Merge pull request #257 from vmercierfr/aurora-support
Add support of RDS clusters (Aurora, Aurora Serverless and Multi-DB cluster)
2 parents aaf82d2 + 65e2747 commit 82ae224

38 files changed

+2738
-253
lines changed

.github/workflows/test.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
name: unittest
3-
on: # yamllint disable-line rule:truthy
3+
on: # yamllint disable-line rule:truthy
44
push:
55
branches:
66
- "*"
@@ -19,7 +19,7 @@ jobs:
1919
- uses: actions/checkout@v4
2020
- uses: actions/setup-go@v5
2121
with:
22-
go-version: '1.22'
22+
go-version: "1.22"
2323
- name: Install dependencies
2424
run: |
2525
go get .
@@ -38,13 +38,13 @@ jobs:
3838
hide_complexity: true
3939
indicators: true
4040
output: both
41-
thresholds: '60 80'
41+
thresholds: "60 80"
4242
- uses: jwalton/gh-find-current-pr@v1
4343
id: finder
4444
- name: Add Coverage PR Comment
4545
uses: marocchino/sticky-pull-request-comment@v2
4646
with:
47-
number: ${{ steps.finder.outputs.pr }}
47+
number: ${{ github.event.pull_request.number }}
4848
path: code-coverage-results.md
4949
recreate: true
5050

@@ -102,8 +102,8 @@ jobs:
102102

103103
checkcov:
104104
permissions:
105-
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
106-
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
105+
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
106+
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
107107

108108
runs-on: ubuntu-latest
109109
steps:

.yamllint.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ ignore:
1010
- dist/
1111
- prometheus-rds-exporter.yaml
1212
- configs/helm/templates/*.yaml # Helm templates are invalid due to Go template language
13+
- configs/grafana/vendor/ # Grafonnet dependencies to build dashboards

CONTRIBUTING.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ We use:
4343

4444
* [`helm unittest`](https://github.com/helm-unittest/helm-unittest) for Helm chart tests
4545

46+
* [Jsonnet](https://jsonnet.org/) and [Grafonnet](https://github.com/grafana/grafonnet) to manage dashboards as code. See [Grafana dashboards README](configs/grafana/README.md)
47+
4648
## Pull Request Checklist
4749

4850
* Branch from the `main` branch and, if needed, rebase to the current main branch before submitting your pull request. If it doesn't merge cleanly with main you may be asked to rebase your changes.

README.md

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ It collects key metrics about:
4747
| rds_api_call_total | `api`, `aws_account_id`, `aws_region` | Number of call to AWS API |
4848
| rds_backup_retention_period_seconds | `aws_account_id`, `aws_region`, `dbidentifier` | Automatic DB snapshots retention period |
4949
| rds_ca_certificate_valid_until | `aws_account_id`, `aws_region`, `dbidentifier` | Timestamp of the expiration of the Instance certificate |
50+
| rds_cluster_info | `aws_account_id`, `aws_region`, `cluster_identifier`, `cluster_resource_id`, `engine`, `engine_version`, `arn` | RDS cluster information |
51+
| rds_cluster_acu_max_average | `aws_account_id`, `aws_region`, `cluster_identifier` | Maximum number of ACU |
52+
| rds_cluster_acu_min_average | `aws_account_id`, `aws_region`, `cluster_identifier` | Minimum number of ACU |
53+
| rds_cluster_info | `aws_account_id`, `aws_region`, `cluster_identifier`, `cluster_resource_id`, `engine`, `engine_version`, `arn` | RDS cluster information |
5054
| rds_cpu_usage_percent_average | `aws_account_id`, `aws_region`, `dbidentifier` | Instance CPU used |
5155
| rds_database_connections_average | `aws_account_id`, `aws_region`, `dbidentifier` | The number of client network connections to the database instance |
5256
| rds_dbload_average | `aws_account_id`, `aws_region`, `dbidentifier` | Number of active sessions for the DB engine |
@@ -59,7 +63,7 @@ It collects key metrics about:
5963
| rds_instance_age_seconds | `aws_account_id`, `aws_region`, `dbidentifier` | Time since instance creation |
6064
| rds_instance_baseline_iops_average | `aws_account_id`, `aws_region`, `instance_class` | Baseline IOPS of underlying EC2 instance class |
6165
| rds_instance_baseline_throughput_bytes | `aws_account_id`, `aws_region`, `instance_class` | Baseline throughput of underlying EC2 instance class |
62-
| rds_instance_info | `arn`, `aws_account_id`, `aws_region`, `dbi_resource_id`, `dbidentifier`, `deletion_protection`, `engine`, `engine_version`, `instance_class`, `multi_az`, `performance_insights_enabled`, `pending_maintenance`, `pending_modified_values`, `role`, `source_dbidentifier`, `storage_type`, `ca_certificate_identifier` | RDS instance information |
66+
| rds_instance_info | `arn`, `aws_account_id`, `aws_region`, `dbi_resource_id`, `dbidentifier`, `cluster_identifier`, `deletion_protection`, `engine`, `engine_version`, `instance_class`, `multi_az`, `performance_insights_enabled`, `pending_maintenance`, `pending_modified_values`, `role`, `source_dbidentifier`, `storage_type`, `ca_certificate_identifier` | RDS instance information |
6367
| rds_instance_log_files_size_bytes | `aws_account_id`, `aws_region`, `dbidentifier` | Total of log files on the instance |
6468
| rds_instance_max_iops_average | `aws_account_id`, `aws_region`, `instance_class` | Maximum IOPS of underlying EC2 instance class |
6569
| rds_instance_max_throughput_bytes | `aws_account_id`, `aws_region`, `instance_class` | Maximum throughput of underlying EC2 instance class |
@@ -78,6 +82,7 @@ It collects key metrics about:
7882
| rds_read_throughput_bytes | `aws_account_id`, `aws_region`, `dbidentifier` | Average number of bytes read from disk per second |
7983
| rds_replica_lag_seconds | `aws_account_id`, `aws_region`, `dbidentifier` | For read replica configurations, the amount of time a read replica DB instance lags behind the source DB instance. Applies to MariaDB, Microsoft SQL Server, MySQL, Oracle, and PostgreSQL read replicas |
8084
| rds_replication_slot_disk_usage_bytes | `aws_account_id`, `aws_region`, `dbidentifier` | Disk space used by replication slot files. Applies to PostgreSQL |
85+
| rds_serverless_instance_acu_average | `aws_account_id`, `aws_region`, `dbidentifier` | Current ACU of the Aurora Serverless instance |
8186
| rds_swap_usage_bytes | `aws_account_id`, `aws_region`, `dbidentifier` | Amount of swap space used on the DB instance. This metric is not available for SQL Server |
8287
| rds_transaction_logs_disk_usage_bytes | `aws_account_id`, `aws_region`, `dbidentifier` | Disk space used by transaction logs (only on PostgreSQL) |
8388
| rds_usage_allocated_storage_bytes | `aws_account_id`, `aws_region` | Total storage used by AWS RDS instances |
@@ -192,6 +197,22 @@ RDS instance details</br>
192197
![RDS exporters](docs/screenshots/rds-exporter.png)
193198
Prometheus RDS exporter</br>
194199
<a href="configs/grafana/public/prometheus-rds-exporter.json">JSON</a> or <a href="https://grafana.com/grafana/dashboards/19679/">19679</a>
200+
</td>
201+
</tr>
202+
203+
<tr>
204+
<td>
205+
206+
![RDS clusters](docs/screenshots/rds-clusters.png)
207+
RDS clusters</br>
208+
<a href="configs/grafana/public/rds-clusters.json">JSON</a>
209+
</td>
210+
<td>
211+
212+
![RDS cluster details](docs/screenshots/rds-cluster.png)
213+
RDS cluster details</br>
214+
<a href="configs/grafana/public/rds-cluster.json">JSON</a>
215+
195216
</td>
196217
</tr>
197218
</table>
@@ -200,25 +221,26 @@ Prometheus RDS exporter</br>
200221

201222
Configuration could be defined in [prometheus-rds-exporter.yaml](https://github.com/qonto/prometheus-rds-exporter/blob/main/configs/prometheus-rds-exporter/prometheus-rds-exporter.yaml) or environment variables (format `PROMETHEUS_RDS_EXPORTER_<PARAMETER_NAME>`).
202223

203-
|Parameter | Description | Default |
204-
| ------------------------ | -------------------------------------------------------------------------------------------------------------------------- | ----------------------- |
205-
| aws-assume-role-arn | AWS IAM ARN role to assume to fetch metrics | |
206-
| aws-assume-role-session | AWS assume role session name | prometheus-rds-exporter |
207-
| collect-instance-metrics | Collect AWS instances metrics (AWS Cloudwatch API) | true |
208-
| collect-instance-tags | Collect AWS RDS tags | true |
209-
| collect-instance-types | Collect AWS instance types information (AWS EC2 API) | true |
210-
| collect-logs-size | Collect AWS instances logs size (AWS RDS API) | true |
211-
| collect-maintenances | Collect AWS instances maintenances (AWS RDS API) | true |
212-
| collect-quotas | Collect AWS RDS quotas (AWS quotas API) | true |
213-
| collect-usages | Collect AWS RDS usages (AWS Cloudwatch API) | true |
214-
| tag-selections | Tags to select database instances with. Refer to [dedicated section on tag configuration](#tag-configuration) | |
215-
| debug | Enable debug mode | |
216-
| enable-otel-traces | Enable OpenTelemetry traces. See [configuration](https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/) | false |
217-
| listen-address | Address to listen on for web interface | :9043 |
218-
| log-format | Log format (`text` or `json`) | json |
219-
| metrics-path | Path under which to expose metrics | /metrics |
220-
| tls-cert-path | Path to TLS certificate | |
221-
| tls-key-path | Path to private key for TLS | |
224+
|Parameter | Description | Default |
225+
| ---------------------------- | --------------------------------------------------------------------------------------------------------------------------------- | ----------------------- |
226+
| aws-assume-role-arn | AWS IAM ARN role to assume to fetch metrics | |
227+
| aws-assume-role-session | AWS assume role session name | prometheus-rds-exporter |
228+
| collect-instance-metrics | Collect AWS instances metrics (AWS Cloudwatch API) | true |
229+
| collect-instance-tags | Collect AWS RDS tags | true |
230+
| collect-instance-types | Collect AWS instance types information (AWS EC2 API) | true |
231+
| collect-logs-size | Collect AWS instances logs size, excluding serverless instances (AWS RDS API) | true |
232+
| collect-serverless-logs-size | Collect AWS instances logs size for serverless DB instance (AWS RDS API). Prevents RDS serverless DB instances from going to zero | false |
233+
| collect-maintenances | Collect AWS instances maintenances (AWS RDS API) | true |
234+
| collect-quotas | Collect AWS RDS quotas (AWS quotas API) | true |
235+
| collect-usages | Collect AWS RDS usages (AWS Cloudwatch API) | true |
236+
| tag-selections | Tags to select database instances with. Refer to [dedicated section on tag configuration](#tag-configuration) | |
237+
| debug | Enable debug mode | |
238+
| enable-otel-traces | Enable OpenTelemetry traces. See [configuration](https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter/) | false |
239+
| listen-address | Address to listen on for web interface | :9043 |
240+
| log-format | Log format (`text` or `json`) | json |
241+
| metrics-path | Path under which to expose metrics | /metrics |
242+
| tls-cert-path | Path to TLS certificate | |
243+
| tls-key-path | Path to private key for TLS | |
222244

223245
Configuration parameters priorities:
224246

cmd/root.go

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,24 @@ var (
3939
)
4040

4141
type exporterConfig struct {
42-
Debug bool `koanf:"debug"`
43-
LogFormat string `koanf:"log-format"`
44-
TLSCertPath string `koanf:"tls-cert-path"`
45-
TLSKeyPath string `koanf:"tls-key-path"`
46-
MetricPath string `koanf:"metrics-path"`
47-
ListenAddress string `koanf:"listen-address"`
48-
AWSAssumeRoleSession string `koanf:"aws-assume-role-session"`
49-
AWSAssumeRoleArn string `koanf:"aws-assume-role-arn"`
50-
CollectInstanceMetrics bool `koanf:"collect-instance-metrics"`
51-
CollectInstanceTags bool `koanf:"collect-instance-tags"`
52-
CollectInstanceTypes bool `koanf:"collect-instance-types"`
53-
CollectLogsSize bool `koanf:"collect-logs-size"`
54-
CollectMaintenances bool `koanf:"collect-maintenances"`
55-
CollectQuotas bool `koanf:"collect-quotas"`
56-
CollectUsages bool `koanf:"collect-usages"`
57-
OTELTracesEnabled bool `koanf:"enable-otel-traces"`
58-
TagSelections map[string][]string `koanf:"tag-selections"`
42+
Debug bool `koanf:"debug"`
43+
LogFormat string `koanf:"log-format"`
44+
TLSCertPath string `koanf:"tls-cert-path"`
45+
TLSKeyPath string `koanf:"tls-key-path"`
46+
MetricPath string `koanf:"metrics-path"`
47+
ListenAddress string `koanf:"listen-address"`
48+
AWSAssumeRoleSession string `koanf:"aws-assume-role-session"`
49+
AWSAssumeRoleArn string `koanf:"aws-assume-role-arn"`
50+
CollectInstanceMetrics bool `koanf:"collect-instance-metrics"`
51+
CollectInstanceTags bool `koanf:"collect-instance-tags"`
52+
CollectInstanceTypes bool `koanf:"collect-instance-types"`
53+
CollectLogsSize bool `koanf:"collect-logs-size"`
54+
CollectServerlessLogsSize bool `koanf:"collect-serverless-logs-size"`
55+
CollectMaintenances bool `koanf:"collect-maintenances"`
56+
CollectQuotas bool `koanf:"collect-quotas"`
57+
CollectUsages bool `koanf:"collect-usages"`
58+
OTELTracesEnabled bool `koanf:"enable-otel-traces"`
59+
TagSelections map[string][]string `koanf:"tag-selections"`
5960
}
6061

6162
func run(configuration exporterConfig) {
@@ -92,14 +93,15 @@ func run(configuration exporterConfig) {
9293
servicequotasClient := servicequotas.NewFromConfig(cfg)
9394

9495
collectorConfiguration := exporter.Configuration{
95-
CollectInstanceMetrics: configuration.CollectInstanceMetrics,
96-
CollectInstanceTypes: configuration.CollectInstanceTypes,
97-
CollectInstanceTags: configuration.CollectInstanceTags,
98-
CollectLogsSize: configuration.CollectLogsSize,
99-
CollectMaintenances: configuration.CollectMaintenances,
100-
CollectQuotas: configuration.CollectQuotas,
101-
CollectUsages: configuration.CollectUsages,
102-
TagSelections: configuration.TagSelections,
96+
CollectInstanceMetrics: configuration.CollectInstanceMetrics,
97+
CollectInstanceTypes: configuration.CollectInstanceTypes,
98+
CollectInstanceTags: configuration.CollectInstanceTags,
99+
CollectLogsSize: configuration.CollectLogsSize,
100+
CollectServerlessLogsSize: configuration.CollectServerlessLogsSize,
101+
CollectMaintenances: configuration.CollectMaintenances,
102+
CollectQuotas: configuration.CollectQuotas,
103+
CollectUsages: configuration.CollectUsages,
104+
TagSelections: configuration.TagSelections,
103105
}
104106

105107
collector := exporter.NewCollector(*logger, collectorConfiguration, awsAccountID, awsRegion, rdsClient, ec2Client, cloudWatchClient, servicequotasClient, tagClient)
@@ -163,7 +165,8 @@ func NewRootCommand() (*cobra.Command, error) {
163165
cmd.Flags().BoolP("collect-instance-tags", "", true, "Collect AWS RDS tags")
164166
cmd.Flags().BoolP("collect-instance-types", "", true, "Collect AWS instance types")
165167
cmd.Flags().BoolP("collect-instance-metrics", "", true, "Collect AWS instance metrics")
166-
cmd.Flags().BoolP("collect-logs-size", "", true, "Collect AWS instances logs size")
168+
cmd.Flags().BoolP("collect-logs-size", "", true, "Collect AWS instances logs size for non serverless instances")
169+
cmd.Flags().BoolP("collect-serverless-logs-size", "", false, "Collect AWS instances logs size for serverless DB instances")
167170
cmd.Flags().BoolP("collect-maintenances", "", true, "Collect AWS instances maintenances")
168171
cmd.Flags().BoolP("collect-quotas", "", true, "Collect AWS RDS quotas")
169172
cmd.Flags().BoolP("collect-usages", "", true, "Collect AWS RDS usages")

0 commit comments

Comments
 (0)