Skip to content

Commit 8228a81

Browse files
committed
feat: add L003 report for integer overflow risks in sequence-generated PKs
Implement L003 report to monitor potential integer overflow in sequence-generated primary keys (serial/identity columns). Based on the approach described by Laurenz Albe at CYBERTEC: https://www.cybertec-postgresql.com/en/integer-overflow-in-sequence-generated-primary-keys/ The report: - Queries pgwatch_sequence_overflow_current_value metric - Tracks both sequence and column data types (smallint/integer/bigint) - Calculates percentage of max value used for each type - Identifies high-risk columns exceeding configurable thresholds - Sorts results by highest capacity usage Slack thread: https://postgres-ai.slack.com/archives/CCKQPEM09/p1768427779650849
1 parent 6e9a1f1 commit 8228a81

File tree

4 files changed

+313
-1
lines changed

4 files changed

+313
-1
lines changed

reporter/postgres_reports.py

Lines changed: 124 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
55
This script generates JSON reports containing Observations for specific PostgreSQL
66
check types (A002, A003, A004, A007, D004, F001, F004, F005, H001, H002, H004,
7-
K001, K003, K004, K005, K006, K007, K008, M001, M002, M003, N001) by querying
7+
K001, K003, K004, K005, K006, K007, K008, L003, M001, M002, M003, N001) by querying
88
Prometheus metrics using PromQL.
99
1010
IMPORTANT: Scope of this module
@@ -1013,6 +1013,128 @@ def generate_h004_redundant_indexes_report(self, cluster: str = "local", node_na
10131013
postgres_version=self._get_postgres_version_info(cluster, node_name),
10141014
)
10151015

1016+
def generate_l003_integer_overflow_report(
1017+
self, cluster: str = "local", node_name: str = "node-01",
1018+
warning_threshold_pct: float = 50.0, critical_threshold_pct: float = 75.0
1019+
) -> Dict[str, Any]:
1020+
"""
1021+
Generate L003 Integer Out-of-Range Risks report.
1022+
1023+
Monitors sequence-generated columns (serial/identity) for potential integer overflow
1024+
by checking how close the current sequence value is to the maximum value allowed
1025+
by the data type (smallint: 32767, integer: 2147483647, bigint: 9223372036854775807).
1026+
1027+
Based on the approach described by Laurenz Albe at CYBERTEC:
1028+
https://www.cybertec-postgresql.com/en/integer-overflow-in-sequence-generated-primary-keys/
1029+
1030+
Args:
1031+
cluster: Cluster name
1032+
node_name: Node name
1033+
warning_threshold_pct: Percentage threshold for warning (default 50%)
1034+
critical_threshold_pct: Percentage threshold for critical (default 75%)
1035+
1036+
Returns:
1037+
Dictionary containing sequence overflow risk observation data
1038+
"""
1039+
logger.info("Generating L003 Integer Out-of-Range Risks report...")
1040+
1041+
# Data type max values (positive range only since sequences typically start at 1)
1042+
TYPE_MAX_VALUES = {
1043+
'smallint': 32767,
1044+
'integer': 2147483647,
1045+
'bigint': 9223372036854775807,
1046+
}
1047+
1048+
# Get all databases
1049+
databases = self.get_all_databases(cluster, node_name)
1050+
1051+
overflow_risks_by_db = {}
1052+
for db_name in databases:
1053+
# Query pgwatch metrics for sequence overflow data
1054+
# The metric contains: sequence_name, schema_name, table_name, column_name,
1055+
# sequence_data_type, column_data_type, current_value
1056+
base_filter = f'cluster="{cluster}", node_name="{node_name}", datname="{db_name}"'
1057+
1058+
# Query primary metric - current sequence value
1059+
current_value_query = f'last_over_time(pgwatch_sequence_overflow_current_value{{{base_filter}}}[3h])'
1060+
current_value_result = self.query_instant(current_value_query)
1061+
1062+
# Build sequence data keyed by (schema_name, table_name, sequence_name)
1063+
sequences_data: Dict[tuple, Dict[str, Any]] = {}
1064+
1065+
if current_value_result.get('status') == 'success' and current_value_result.get('data', {}).get('result'):
1066+
for item in current_value_result['data']['result']:
1067+
metric = item['metric']
1068+
schema_name = metric.get('schema_name', 'public')
1069+
table_name = metric.get('table_name', 'unknown')
1070+
column_name = metric.get('column_name', 'unknown')
1071+
sequence_name = metric.get('sequence_name', 'unknown')
1072+
sequence_data_type = metric.get('sequence_data_type', 'integer').lower()
1073+
column_data_type = metric.get('column_data_type', 'integer').lower()
1074+
1075+
# Get current value from the metric value
1076+
current_value = int(float(item['value'][1])) if item.get('value') else 0
1077+
1078+
key = (schema_name, table_name, sequence_name)
1079+
1080+
# Calculate max values based on data types
1081+
sequence_max = TYPE_MAX_VALUES.get(sequence_data_type, TYPE_MAX_VALUES['integer'])
1082+
column_max = TYPE_MAX_VALUES.get(column_data_type, TYPE_MAX_VALUES['integer'])
1083+
1084+
# The effective max is the smaller of the two (sequence type vs column type)
1085+
# since the column type is what ultimately stores the value
1086+
effective_max = min(sequence_max, column_max)
1087+
1088+
# Calculate percentages
1089+
sequence_percent_used = (current_value / sequence_max * 100) if sequence_max > 0 else 0
1090+
column_percent_used = (current_value / column_max * 100) if column_max > 0 else 0
1091+
1092+
# Use the more restrictive (higher) percentage for display
1093+
capacity_percent = max(sequence_percent_used, column_percent_used)
1094+
1095+
sequences_data[key] = {
1096+
"schema_name": schema_name,
1097+
"table_name": table_name,
1098+
"column_name": column_name,
1099+
"sequence_name": sequence_name,
1100+
"sequence_data_type": sequence_data_type,
1101+
"column_data_type": column_data_type,
1102+
"current_value": current_value,
1103+
"max_value": effective_max,
1104+
"sequence_percent_used": round(sequence_percent_used, 2),
1105+
"column_percent_used": round(column_percent_used, 2),
1106+
"capacity_used_pretty": f"{capacity_percent:.2f}%",
1107+
}
1108+
1109+
# Convert to list and sort by capacity used (descending)
1110+
overflow_risks = list(sequences_data.values())
1111+
overflow_risks.sort(key=lambda x: max(x['sequence_percent_used'], x['column_percent_used']), reverse=True)
1112+
1113+
# Count high-risk entries (above warning threshold)
1114+
high_risk_count = sum(
1115+
1 for risk in overflow_risks
1116+
if max(risk['sequence_percent_used'], risk['column_percent_used']) >= warning_threshold_pct
1117+
)
1118+
1119+
# Skip databases with no sequence data
1120+
if not overflow_risks:
1121+
continue
1122+
1123+
overflow_risks_by_db[db_name] = {
1124+
"overflow_risks": overflow_risks,
1125+
"total_count": len(overflow_risks),
1126+
"high_risk_count": high_risk_count,
1127+
"warning_threshold_pct": warning_threshold_pct,
1128+
"critical_threshold_pct": critical_threshold_pct,
1129+
}
1130+
1131+
return self.format_report_data(
1132+
"L003",
1133+
overflow_risks_by_db,
1134+
node_name,
1135+
postgres_version=self._get_postgres_version_info(cluster, node_name),
1136+
)
1137+
10161138
def generate_d004_pgstat_settings_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[
10171139
str, Any]:
10181140
"""
@@ -4047,6 +4169,7 @@ def generate_all_reports(self, cluster: str = "local", node_name: str = None, co
40474169
('H001', self.generate_h001_invalid_indexes_report),
40484170
('H002', self.generate_h002_unused_indexes_report),
40494171
('H004', self.generate_h004_redundant_indexes_report),
4172+
('L003', self.generate_l003_integer_overflow_report),
40504173
('K001', self.generate_k001_query_calls_report),
40514174
('K003', self.generate_k003_top_queries_report),
40524175
('K004', self.generate_k004_temp_bytes_report),

reporter/schemas/L003.schema.json

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
{
2+
"$schema": "https://json-schema.org/draft/2020-12/schema",
3+
"title": "L003 report schema",
4+
"description": "Integer out-of-range risks in sequence-generated primary keys",
5+
"type": "object",
6+
"additionalProperties": false,
7+
"required": ["checkId", "checkTitle", "timestamptz", "nodes", "results"],
8+
"properties": {
9+
"version": { "type": ["string", "null"] },
10+
"build_ts": { "type": ["string", "null"] },
11+
"generation_mode": { "type": ["string", "null"] },
12+
"checkId": { "const": "L003" },
13+
"checkTitle": { "const": "Integer out-of-range risks in PKs" },
14+
"timestamptz": { "type": "string" },
15+
"nodes": { "$ref": "#/$defs/nodes" },
16+
"results": {
17+
"type": "object",
18+
"minProperties": 1,
19+
"additionalProperties": { "$ref": "#/$defs/nodeResult" }
20+
}
21+
},
22+
"$defs": {
23+
"nodes": {
24+
"type": "object",
25+
"additionalProperties": false,
26+
"required": ["primary", "standbys"],
27+
"properties": {
28+
"primary": { "type": "string" },
29+
"standbys": { "type": "array", "items": { "type": "string" } }
30+
}
31+
},
32+
"postgresVersion": {
33+
"type": "object",
34+
"additionalProperties": false,
35+
"required": ["version", "server_version_num", "server_major_ver", "server_minor_ver"],
36+
"properties": {
37+
"version": { "type": "string" },
38+
"server_version_num": { "type": "string" },
39+
"server_major_ver": { "type": "string" },
40+
"server_minor_ver": { "type": "string" }
41+
}
42+
},
43+
"sequenceOverflowRisk": {
44+
"type": "object",
45+
"additionalProperties": false,
46+
"required": [
47+
"schema_name",
48+
"table_name",
49+
"column_name",
50+
"sequence_name",
51+
"sequence_data_type",
52+
"column_data_type",
53+
"current_value",
54+
"max_value",
55+
"sequence_percent_used",
56+
"column_percent_used",
57+
"capacity_used_pretty"
58+
],
59+
"properties": {
60+
"schema_name": {
61+
"type": "string",
62+
"description": "Schema containing the table"
63+
},
64+
"table_name": {
65+
"type": "string",
66+
"description": "Table name that owns the sequence"
67+
},
68+
"column_name": {
69+
"type": "string",
70+
"description": "Column name that uses the sequence (serial/identity)"
71+
},
72+
"sequence_name": {
73+
"type": "string",
74+
"description": "Name of the sequence"
75+
},
76+
"sequence_data_type": {
77+
"type": "string",
78+
"description": "Data type of the sequence (smallint, integer, bigint)"
79+
},
80+
"column_data_type": {
81+
"type": "string",
82+
"description": "Data type of the column that uses the sequence"
83+
},
84+
"current_value": {
85+
"type": "integer",
86+
"description": "Current value of the sequence (last used value)"
87+
},
88+
"max_value": {
89+
"type": "integer",
90+
"description": "Maximum value based on the more restrictive data type (sequence or column)"
91+
},
92+
"sequence_percent_used": {
93+
"type": "number",
94+
"description": "Percentage of sequence's data type range that has been used (0-100)"
95+
},
96+
"column_percent_used": {
97+
"type": "number",
98+
"description": "Percentage of column's data type range that has been used (0-100)"
99+
},
100+
"capacity_used_pretty": {
101+
"type": "string",
102+
"description": "Human-readable percentage with formatting (e.g., '45.23%')"
103+
}
104+
}
105+
},
106+
"dbEntry": {
107+
"type": "object",
108+
"additionalProperties": false,
109+
"required": [
110+
"overflow_risks",
111+
"total_count",
112+
"high_risk_count",
113+
"warning_threshold_pct",
114+
"critical_threshold_pct"
115+
],
116+
"properties": {
117+
"overflow_risks": {
118+
"type": "array",
119+
"items": { "$ref": "#/$defs/sequenceOverflowRisk" }
120+
},
121+
"total_count": {
122+
"type": "integer",
123+
"description": "Total number of sequence-generated columns found"
124+
},
125+
"high_risk_count": {
126+
"type": "integer",
127+
"description": "Number of columns exceeding the warning threshold"
128+
},
129+
"warning_threshold_pct": {
130+
"type": "number",
131+
"description": "Threshold percentage for warning level (default 50%)"
132+
},
133+
"critical_threshold_pct": {
134+
"type": "number",
135+
"description": "Threshold percentage for critical level (default 75%)"
136+
}
137+
}
138+
},
139+
"data": {
140+
"type": "object",
141+
"additionalProperties": { "$ref": "#/$defs/dbEntry" }
142+
},
143+
"nodeResult": {
144+
"type": "object",
145+
"additionalProperties": false,
146+
"required": ["data"],
147+
"properties": {
148+
"data": { "$ref": "#/$defs/data" },
149+
"postgres_version": { "$ref": "#/$defs/postgresVersion" }
150+
}
151+
}
152+
}
153+
}

tests/reporter/test_generators_unit.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,7 @@ def _(*args, **kwargs):
902902
"generate_h001_invalid_indexes_report",
903903
"generate_h002_unused_indexes_report",
904904
"generate_h004_redundant_indexes_report",
905+
"generate_l003_integer_overflow_report",
905906
"generate_k001_query_calls_report",
906907
"generate_k003_top_queries_report",
907908
"generate_k004_temp_bytes_report",
@@ -937,6 +938,7 @@ def _(*args, **kwargs):
937938
'D004', 'F001', 'F004', 'F005', 'G001',
938939
'H001', 'H002', 'H004',
939940
'K001', 'K003', 'K004', 'K005', 'K006', 'K007', 'K008',
941+
'L003',
940942
'M001', 'M002', 'M003',
941943
'N001',
942944
# S001 is not implemented yet

tests/reporter/test_report_schemas.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,40 @@ def test_schema_h004(
457457
validate_report(report)
458458

459459

460+
@pytest.mark.unit
461+
def test_schema_l003(
462+
monkeypatch: pytest.MonkeyPatch,
463+
generator: PostgresReportGenerator,
464+
fixed_pg_version,
465+
prom_result,
466+
) -> None:
467+
monkeypatch.setattr(generator, "_get_postgres_version_info", lambda *args, **kwargs: fixed_pg_version)
468+
monkeypatch.setattr(generator, "get_all_databases", lambda *args, **kwargs: ["app"])
469+
470+
responses = {
471+
"pgwatch_sequence_overflow_current_value": prom_result(
472+
[
473+
{
474+
"metric": {
475+
"schema_name": "public",
476+
"table_name": "orders",
477+
"column_name": "id",
478+
"sequence_name": "orders_id_seq",
479+
"sequence_data_type": "integer",
480+
"column_data_type": "integer",
481+
"datname": "app",
482+
},
483+
"value": [0, "1073741824"], # ~50% of integer max
484+
}
485+
]
486+
),
487+
}
488+
monkeypatch.setattr(generator, "query_instant", _query_stub_factory(prom_result, responses))
489+
490+
report = generator.generate_l003_integer_overflow_report("local", "node-1")
491+
validate_report(report)
492+
493+
460494
def _sample_query_metric_row() -> dict[str, Any]:
461495
# Must match _process_pgss_data() output keys for the current mapping used in _get_pgss_metrics_data_by_db().
462496
return {

0 commit comments

Comments
 (0)