Skip to content

Commit 9c5bb2f

Browse files
authored
Implemented a fallback parameter validation for DbClusterParameterGroup (#230)
fixes aws-controllers-k8s/community#1847 `DbClusterParameterGroup` was incorrectly rejecting valid parameters like `slow_query_log`, `long_query_time`, and `log_queries_not_using_indexes` with "unknown parameter" errors. This occurred because the controller only validated parameters against `DescribeEngineDefaultClusterParameters` API, but some valid cluster parameters (particularly MySQL logging parameters) are only listed in the `DescribeEngineDefaultParameters` (instance-level) API response. Description of changes: Implemented a fallback parameter validation mechanism in `getParameterMeta()` that first checks cluster-level parameter defaults, and if not found, falls back to instance-level parameter defaults before rejecting a parameter as unknown. This allows the controller to properly validate and apply parameters that AWS RDS Console and CLI support but weren't previously accessible through the ACK controller, while maintaining backward compatibility and proper parameter metadata handling for apply methods and modifiability checks. ```yaml apiVersion: rds.services.k8s.aws/v1alpha1 kind: DBClusterParameterGroup metadata: name: test-slow-query-log spec: name: test-slow-query-log description: Test parameter group to reproduce slow_query_log issue family: "aurora-mysql8.0" parameterOverrides: slow_query_log: "1" long_query_time: "10" log_queries_not_using_indexes: "1" ``` ``` {"level":"debug","ts":"2025-07-21T14:41:49.089-0700","logger":"ackrt","msg":"patched resource status","kind":"DBClusterParameterGroup","namespace":"default","name":"test-slow-query-log","account":"xxx","role":"","region":"us-west-2","is_adopted":false,"generation":1,"json":"{\"metadata\":{\"resourceVersion\":\"103993920\"},\"spec\":{\"tags\":null},\"status\":{\"ackResourceMetadata\":{\"arn\":\"arn:aws:rds:us-west-2:xxxx:cluster-pg:test-slow-query-log\",\"ownerAccountID\":\"xxxx\",\"region\":\"us-west-2\"},\"conditions\":[{\"lastTransitionTime\":\"2025-07-21T21:41:49Z\",\"message\":\"Resource synced successfully\",\"reason\":\"\",\"status\":\"True\",\"type\":\"ACK.ResourceSynced\"}],\"parameterOverrideStatuses\":[{\"applyMethod\":\"immediate\",\"applyType\":\"dynamic\",\"parameterName\":\"log_queries_not_using_indexes\",\"parameterValue\":\"1\"},{\"applyMethod\":\"immediate\",\"applyType\":\"dynamic\",\"parameterName\":\"long_query_time\",\"parameterValue\":\"10\"},{\"applyMethod\":\"immediate\",\"applyType\":\"dynamic\",\"parameterName\":\"slow_query_log\",\"parameterValue\":\"1\"}]}}"} ``` By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent 1bf5026 commit 9c5bb2f

File tree

5 files changed

+274
-11
lines changed

5 files changed

+274
-11
lines changed

pkg/resource/db_cluster_parameter_group/hooks.go

Lines changed: 79 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,17 @@ const (
3737
)
3838

3939
var (
40-
// cache of parameter defaults
40+
// cache of parameter defaults for cluster parameters
4141
cachedParamMeta = util.ParamMetaCache{
4242
Cache: map[string]map[string]util.ParamMeta{},
4343
}
4444

45+
// cache of parameter defaults for instance parameters
46+
// Used as fallback when cluster parameter validation fails
47+
instanceParamMeta = util.ParamMetaCache{
48+
Cache: map[string]map[string]util.ParamMeta{},
49+
}
50+
4551
errParameterGroupJustCreated = fmt.Errorf("parameter group just got created")
4652
requeueWaitWhileCreating = ackrequeue.NeededAfter(
4753
errParameterGroupJustCreated,
@@ -208,6 +214,11 @@ func sdkTagsFromResourceTags(
208214
// RDS does not have a DeleteParameter or DeleteParameterFromParameterGroup API
209215
// call. Instead, you need to call ResetDBClusterParameterGroup with a list of
210216
// DB Cluster Parameters that you want RDS to reset to a default value.
217+
//
218+
// Note(rushmash91): This function uses fallback parameter validation to work around an AWS API
219+
// limitation where DescribeEngineDefaultClusterParameters may not return all valid
220+
// cluster parameters (e.g., MySQL logging parameters like slow_query_log). See
221+
// getParameterMeta() for details on the fallback mechanism.
211222
func (rm *resourceManager) syncParameters(
212223
ctx context.Context,
213224
desired *resource,
@@ -307,9 +318,7 @@ func (rm *resourceManager) resetParameters(
307318
// default to this if something goes wrong looking up parameter
308319
// defaults
309320
applyMethod := svcsdktypes.ApplyMethodImmediate
310-
pMeta, err = cachedParamMeta.Get(
311-
ctx, *family, paramName, rm.getFamilyParameters,
312-
)
321+
pMeta, err = rm.getParameterMeta(ctx, *family, paramName)
313322
if err != nil {
314323
return err
315324
}
@@ -360,9 +369,7 @@ func (rm *resourceManager) modifyParameters(
360369
for paramName, paramValue := range toModify {
361370
// default to "immediate" if something goes wrong looking up defaults
362371
applyMethod := svcsdktypes.ApplyMethodImmediate
363-
pMeta, err = cachedParamMeta.Get(
364-
ctx, *family, paramName, rm.getFamilyParameters,
365-
)
372+
pMeta, err = rm.getParameterMeta(ctx, *family, paramName)
366373
if err != nil {
367374
return err
368375
}
@@ -433,3 +440,68 @@ func (rm *resourceManager) getFamilyParameters(
433440
}
434441
return familyMeta, nil
435442
}
443+
444+
// getInstanceFamilyParameters calls the RDS DescribeEngineDefaultParameters API to
445+
// retrieve the set of parameter information for a DB instance parameter group family.
446+
// This is used as a fallback when cluster parameter validation fails, as some valid
447+
// cluster parameters may only be listed in the instance parameter defaults.
448+
func (rm *resourceManager) getInstanceFamilyParameters(
449+
ctx context.Context,
450+
family string,
451+
) (map[string]util.ParamMeta, error) {
452+
var marker *string
453+
familyMeta := map[string]util.ParamMeta{}
454+
455+
for {
456+
resp, err := rm.sdkapi.DescribeEngineDefaultParameters(
457+
ctx,
458+
&svcsdk.DescribeEngineDefaultParametersInput{
459+
DBParameterGroupFamily: aws.String(family),
460+
Marker: marker,
461+
},
462+
)
463+
rm.metrics.RecordAPICall("GET", "DescribeEngineDefaultParameters", err)
464+
if err != nil {
465+
return nil, err
466+
}
467+
for _, param := range resp.EngineDefaults.Parameters {
468+
pName := *param.ParameterName
469+
familyMeta[pName] = util.ParamMeta{
470+
IsModifiable: *param.IsModifiable,
471+
IsDynamic: *param.ApplyType != applyTypeStatic,
472+
}
473+
}
474+
marker = resp.EngineDefaults.Marker
475+
if marker == nil {
476+
break
477+
}
478+
}
479+
return familyMeta, nil
480+
}
481+
482+
// getParameterMeta retrieves parameter metadata with fallback validation.
483+
// First tries cluster-level parameter validation, then falls back to instance-level
484+
// validation if the parameter is not found. This works around the AWS API limitation
485+
// where DescribeEngineDefaultClusterParameters may not return all valid cluster
486+
// parameters (e.g., MySQL logging parameters like slow_query_log).
487+
func (rm *resourceManager) getParameterMeta(
488+
ctx context.Context,
489+
family string,
490+
paramName string,
491+
) (*util.ParamMeta, error) {
492+
// Try cluster-level parameters first
493+
pMeta, err := cachedParamMeta.Get(ctx, family, paramName, rm.getFamilyParameters)
494+
if err == nil {
495+
return pMeta, nil
496+
}
497+
498+
// If not found in cluster parameters, try instance-level parameters
499+
// Some valid cluster parameters may only be listed in instance defaults
500+
instanceMeta, instanceErr := instanceParamMeta.Get(ctx, family, paramName, rm.getInstanceFamilyParameters)
501+
if instanceErr == nil {
502+
return instanceMeta, nil
503+
}
504+
505+
// Return original error if not found in either
506+
return nil, err
507+
}

test/e2e/db_cluster_parameter_group.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,18 +74,50 @@ def get(db_cluster_parameter_group_name):
7474
return None
7575

7676
def get_parameters(db_cluster_parameter_group_name):
77-
"""Returns a dict containing the paramters of a given parameter group
77+
"""Returns a dict containing the parameters of a given parameter group
7878
79-
If no such DB cluster parameter group exists, returns None.
79+
If no such DB cluster parameter group exists, returns empty list.
8080
"""
8181
c = boto3.client('rds')
8282
try:
8383
resp = c.describe_db_cluster_parameters(
8484
DBClusterParameterGroupName=db_cluster_parameter_group_name,
8585
)
8686
return resp['Parameters']
87-
except c.exceptions.DBClusterParameterGroupNotFoundFault:
88-
return None
87+
except c.exceptions.DBParameterGroupNotFoundFault:
88+
return []
89+
90+
def get_user_defined_parameters(db_cluster_parameter_group_name):
91+
"""Returns a dict containing the user-defined parameters of a given cluster parameter group
92+
93+
If no such DB cluster parameter group exists, returns empty list.
94+
Uses Source="user" to get only user-defined parameters (like the controller does).
95+
"""
96+
c = boto3.client('rds')
97+
try:
98+
all_parameters = []
99+
marker = None
100+
101+
while True:
102+
params = {
103+
'DBClusterParameterGroupName': db_cluster_parameter_group_name,
104+
'Source': 'user'
105+
}
106+
if marker:
107+
params['Marker'] = marker
108+
109+
resp = c.describe_db_cluster_parameters(**params)
110+
all_parameters.extend(resp['Parameters'])
111+
112+
# Check if there are more results
113+
if 'Marker' in resp:
114+
marker = resp['Marker']
115+
else:
116+
break
117+
118+
return all_parameters
119+
except c.exceptions.DBParameterGroupNotFoundFault:
120+
return []
89121

90122
def get_tags(db_cluster_parameter_group_arn):
91123
"""Returns a dict containing the DB cluster parameter group's tag records

test/e2e/db_parameter_group.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,42 @@ def get_parameters(db_parameter_group_name):
8888
return None
8989

9090

91+
def get_engine_default_parameters(db_parameter_group_family):
92+
"""Returns a dict containing the engine default parameters for a given parameter group family
93+
94+
This function calls DescribeEngineDefaultParameters to get the default parameter metadata
95+
that's used as fallback validation in cluster parameter groups.
96+
"""
97+
c = boto3.client('rds')
98+
try:
99+
all_parameters = []
100+
marker = None
101+
102+
while True:
103+
if marker:
104+
resp = c.describe_engine_default_parameters(
105+
DBParameterGroupFamily=db_parameter_group_family,
106+
Marker=marker
107+
)
108+
else:
109+
resp = c.describe_engine_default_parameters(
110+
DBParameterGroupFamily=db_parameter_group_family,
111+
)
112+
113+
parameters = resp['EngineDefaults']['Parameters']
114+
all_parameters.extend(parameters)
115+
116+
# Check if there are more results
117+
if 'Marker' in resp['EngineDefaults']:
118+
marker = resp['EngineDefaults']['Marker']
119+
else:
120+
break
121+
122+
return all_parameters
123+
except Exception as e:
124+
return None
125+
126+
91127
def get_tags(db_parameter_group_arn):
92128
"""Returns a dict containing the DB parameter group's tag records from the
93129
RDS API.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
apiVersion: rds.services.k8s.aws/v1alpha1
2+
kind: DBClusterParameterGroup
3+
metadata:
4+
name: $DB_CLUSTER_PARAMETER_GROUP_NAME
5+
spec:
6+
name: $DB_CLUSTER_PARAMETER_GROUP_NAME
7+
description: $DB_CLUSTER_PARAMETER_GROUP_DESC
8+
family: $DB_CLUSTER_PARAMETER_GROUP_FAMILY
9+
parameterOverrides:
10+
slow_query_log: "$PARAM_SLOW_QUERY_LOG_VALUE"
11+
long_query_time: "$PARAM_LONG_QUERY_TIME_VALUE"
12+
log_queries_not_using_indexes: "$PARAM_LOG_QUERIES_NOT_USING_INDEXES_VALUE"

test/e2e/tests/test_db_cluster_parameter_group.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from e2e import service_marker, CRD_GROUP, CRD_VERSION, load_rds_resource
2525
from e2e.replacement_values import REPLACEMENT_VALUES
2626
from e2e import db_cluster_parameter_group
27+
from e2e import db_parameter_group
2728
from e2e import tag
2829
from e2e import condition
2930

@@ -77,6 +78,49 @@ def aurora_mysql57_cluster_param_group():
7778
db_cluster_parameter_group.wait_until_deleted(resource_name)
7879

7980

81+
@pytest.fixture
82+
def aurora_mysql80_logging_cluster_param_group():
83+
resource_name = random_suffix_name("aurora-mysql8-logging", 32)
84+
85+
replacements = REPLACEMENT_VALUES.copy()
86+
replacements["DB_CLUSTER_PARAMETER_GROUP_NAME"] = resource_name
87+
replacements["DB_CLUSTER_PARAMETER_GROUP_DESC"] = "Test MySQL logging parameters for Aurora MySQL 8.0"
88+
replacements["DB_CLUSTER_PARAMETER_GROUP_FAMILY"] = "aurora-mysql8.0"
89+
replacements["PARAM_SLOW_QUERY_LOG_VALUE"] = "1"
90+
replacements["PARAM_LONG_QUERY_TIME_VALUE"] = "10"
91+
replacements["PARAM_LOG_QUERIES_NOT_USING_INDEXES_VALUE"] = "1"
92+
93+
resource_data = load_rds_resource(
94+
"db_cluster_parameter_group_aurora_mysql8.0_logging",
95+
additional_replacements=replacements,
96+
)
97+
logging.debug(resource_data)
98+
99+
# Create the k8s resource
100+
ref = k8s.CustomResourceReference(
101+
CRD_GROUP, CRD_VERSION, RESOURCE_PLURAL,
102+
resource_name, namespace="default",
103+
)
104+
k8s.create_custom_resource(ref, resource_data)
105+
cr = k8s.wait_resource_consumed_by_controller(ref)
106+
time.sleep(CREATE_WAIT_AFTER_SECONDS)
107+
108+
assert cr is not None
109+
assert k8s.get_resource_exists(ref)
110+
111+
yield ref, cr, resource_name
112+
113+
# Try to delete, if doesn't already exist
114+
try:
115+
_, deleted = k8s.delete_custom_resource(ref, 3, 10)
116+
assert deleted
117+
time.sleep(DELETE_WAIT_AFTER_SECONDS)
118+
except:
119+
pass
120+
121+
db_cluster_parameter_group.wait_until_deleted(resource_name)
122+
123+
80124
@service_marker
81125
@pytest.mark.canary
82126
class TestDBClusterParameterGroup:
@@ -161,3 +205,70 @@ def test_crud_aurora_mysql5_7(self, aurora_mysql57_cluster_param_group):
161205
assert "ParameterValue" in tp, f"No ParameterValue in parameter of name 'aurora_read_replica_read_committed': {tp}"
162206
assert tp["ParameterValue"] == "ON", f"Wrong value for parameter of name 'aurora_read_replica_read_committed': {tp}"
163207
assert found == 2, f"Did not find parameters with names 'aurora_binlog_read_buffer_size' and 'aurora_read_replica_read_committed': {test_params}"
208+
209+
def test_mysql_logging_parameters(self, aurora_mysql80_logging_cluster_param_group):
210+
ref, cr, resource_name = aurora_mysql80_logging_cluster_param_group
211+
212+
latest = db_cluster_parameter_group.get(resource_name)
213+
assert latest is not None
214+
215+
instance_defaults = db_parameter_group.get_engine_default_parameters("mysql8.0")
216+
assert instance_defaults is not None, "Failed to get instance-level engine defaults"
217+
218+
fallback_params = list(filter(lambda x: x["ParameterName"] in [
219+
"slow_query_log",
220+
"long_query_time",
221+
"log_queries_not_using_indexes",
222+
], instance_defaults))
223+
224+
# Log debug info about found parameters
225+
found_param_names = [p['ParameterName'] for p in fallback_params]
226+
logging.debug(f"Found fallback parameters: {found_param_names}")
227+
228+
assert len(fallback_params) == 3, f"Expected 3 MySQL logging parameters in instance defaults, found {len(fallback_params)}: {found_param_names}"
229+
230+
for param in fallback_params:
231+
assert "ParameterName" in param, f"Missing ParameterName in fallback parameter: {param}"
232+
assert "IsModifiable" in param, f"Missing IsModifiable in fallback parameter: {param}"
233+
assert "ApplyType" in param, f"Missing ApplyType in fallback parameter: {param}"
234+
235+
assert 'status' in cr
236+
assert 'parameterOverrideStatuses' in cr['status']
237+
238+
# Verify the parameter statuses show our MySQL logging parameters
239+
status_params = cr['status']['parameterOverrideStatuses']
240+
param_names = [p['parameterName'] for p in status_params]
241+
242+
assert "slow_query_log" in param_names, f"slow_query_log parameter missing from status: {param_names}"
243+
assert "long_query_time" in param_names, f"long_query_time parameter missing from status: {param_names}"
244+
assert "log_queries_not_using_indexes" in param_names, f"log_queries_not_using_indexes parameter missing from status: {param_names}"
245+
246+
# Additional wait for AWS RDS parameter propagation
247+
# RDS parameter changes can take 5-10 minutes to be fully visible via API
248+
logging.debug("Waiting additional time for AWS RDS parameter propagation...")
249+
time.sleep(MODIFY_WAIT_AFTER_SECONDS)
250+
251+
latest_params = db_cluster_parameter_group.get_user_defined_parameters(resource_name)
252+
assert latest_params is not None, "Failed to get user-defined cluster parameters"
253+
254+
test_params = list(filter(lambda x: x["ParameterName"] in [
255+
"slow_query_log",
256+
"long_query_time",
257+
"log_queries_not_using_indexes",
258+
], latest_params))
259+
260+
# Check initial parameter values
261+
expected_initial_values = {
262+
"slow_query_log": "1",
263+
"long_query_time": "10",
264+
"log_queries_not_using_indexes": "1"
265+
}
266+
267+
for tp in test_params:
268+
param_name = tp["ParameterName"]
269+
assert param_name in expected_initial_values, f"Unexpected parameter: {param_name}"
270+
assert tp["ParameterValue"] == expected_initial_values[param_name], \
271+
f"Wrong value for {param_name}: expected {expected_initial_values[param_name]}, got {tp['ParameterValue']}"
272+
273+
assert len(test_params) == len(expected_initial_values), \
274+
f"Expected {len(expected_initial_values)} parameters, found {len(test_params)}: {test_params}"

0 commit comments

Comments
 (0)