Skip to content

Commit 1ebbafd

Browse files
authored
Remove matching query plan hashes from metrics (#5)
* remove all query plan hashes that match the ones we're trying test for badness; unsure if this is reasonable to remove a plan hash for plans that dont match it, but first pass * add comment back * add comment back * remove new line * some refactoring * add a comment * save test work * add comments around tests * add comments around tests * reformat the if condition newline * remove unneeded import * leave the duped plan in the prior_plan list, but remove it specifically from the metrics * add type hints for Dict key value pairs * fix formatting on multiline return statement
1 parent f4d16e1 commit 1ebbafd

File tree

3 files changed

+229
-12
lines changed

3 files changed

+229
-12
lines changed

plan_monitor/detect.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import time
77
from datetime import datetime, timedelta, timezone
88
from functools import partial
9-
from typing import Dict, List, Any, Tuple
9+
from typing import Dict, List, Any, Tuple, Set
1010

1111
import confluent_kafka
1212
import confluent_kafka.schema_registry.avro
@@ -16,33 +16,53 @@
1616

1717
logger = logging.getLogger('plan_monitor.detect')
1818

19+
def calculate_plan_age_stats(plan_stats: Dict[str, Any], stats_time: int) -> Tuple[int, int]:
20+
plan_age_seconds = (stats_time - plan_stats['creation_time']) / 1000
21+
last_exec_age_seconds = (stats_time - plan_stats['last_execution_time']) / 1000
22+
return (plan_age_seconds, last_exec_age_seconds)
23+
24+
def is_established_plan(plan_age_seconds: int, last_exec_age_seconds: int) -> bool:
25+
return plan_age_seconds > config.MAX_NEW_PLAN_AGE_SECONDS or \
26+
last_exec_age_seconds > config.MAX_AGE_OF_LAST_EXECUTION_SECONDS
27+
28+
def is_plan_under_investigation(plan_stats: Dict[str, Any], stats_time: int) -> bool:
29+
plan_age_seconds, last_exec_age_seconds = calculate_plan_age_stats(plan_stats, stats_time)
30+
return not is_established_plan(plan_age_seconds, last_exec_age_seconds)
31+
32+
# assess which query plan hashes are recent enough to warrant investigation into badness
33+
# so we can prevent older plans with the same query plan hash from potentially ballooning
34+
# metrics used for determining badness
35+
def get_query_plan_hashes_under_investigation(plans: Dict[str, Dict], stats_time: int) -> Set[str]:
36+
return {plan_stats['worst_statement_query_plan_hash'] for plan_handle, plan_stats in plans.items() \
37+
if is_plan_under_investigation(plan_stats, stats_time)}
1938

2039
def find_bad_plans(plans: Dict[str, Dict], stats_time: int) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
2140
prior_times, prior_reads, prior_execs, prior_plans_count, prior_last_execution = 0, 0, 0, 0, 0
2241
prior_plans, candidate_bad_plans, bad_plans = [], [], []
2342
prior_worst_plan_hashes = set()
43+
potential_bad_query_plan_hashes = get_query_plan_hashes_under_investigation(plans, stats_time)
2444

2545
for plan_handle, plan_stats in plans.items():
2646
# shouldn't happen bc of the filter in the STATS_DMVS_QUERY SQL query, just being cautious:
2747
if plan_stats['execution_count'] <= 1:
2848
continue
29-
30-
plan_age_seconds = (stats_time - plan_stats['creation_time']) / 1000
31-
last_exec_age_seconds = (stats_time - plan_stats['last_execution_time']) / 1000
49+
50+
plan_age_seconds, last_exec_age_seconds = calculate_plan_age_stats(plan_stats, stats_time)
51+
current_query_plan_hash = plan_stats['worst_statement_query_plan_hash']
3252

3353
if plan_age_seconds < config.MIN_NEW_PLAN_AGE_SECONDS:
3454
# too new; ignore entirely for now. If it's a problem we'll catch it on next poll
3555
continue
36-
elif plan_age_seconds > config.MAX_NEW_PLAN_AGE_SECONDS or \
37-
last_exec_age_seconds > config.MAX_AGE_OF_LAST_EXECUTION_SECONDS:
56+
elif is_established_plan(plan_age_seconds, last_exec_age_seconds):
3857
# this is an old or "established" plan; gather its stats but don't consider it for "badness"
3958
prior_plans.append(plan_stats)
40-
prior_plans_count += 1
41-
prior_times += plan_stats['total_elapsed_time']
42-
prior_reads += plan_stats['total_logical_reads']
43-
prior_execs += plan_stats['execution_count']
44-
prior_worst_plan_hashes.add(plan_stats['worst_statement_query_plan_hash'])
45-
prior_last_execution = max(prior_last_execution, plan_stats['last_execution_time'])
59+
if current_query_plan_hash not in potential_bad_query_plan_hashes:
60+
prior_plans_count += 1
61+
prior_times += plan_stats['total_elapsed_time']
62+
prior_reads += plan_stats['total_logical_reads']
63+
prior_execs += plan_stats['execution_count']
64+
prior_worst_plan_hashes.add(current_query_plan_hash)
65+
prior_last_execution = max(prior_last_execution, plan_stats['last_execution_time'])
4666
continue
4767
elif plan_stats['total_elapsed_time'] < config.MIN_TOTAL_ELAPSED_TIME_SECONDS * 1_000_000 \
4868
and plan_stats['total_logical_reads'] < config.MIN_TOTAL_LOGICAL_READS:

tests/DetectTests.py

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
import datetime
2+
from unittest import TestCase, mock
3+
from plan_monitor import config
4+
from plan_monitor.detect import calculate_plan_age_stats, is_established_plan, \
5+
get_query_plan_hashes_under_investigation
6+
7+
def get_time_diff_from_ms(start_time: datetime, seconds_to_subtract: int) -> int:
8+
ts = start_time - datetime.timedelta(seconds=seconds_to_subtract)
9+
return ts.timestamp() * 1000
10+
11+
class CalculatePlanAge(TestCase):
12+
13+
def test_calculate_plan_age(self):
14+
plan_stats = {
15+
"creation_time": 3234325,
16+
"last_execution_time": 324242,
17+
"worst_statement_query_plan_hash": "23424252"
18+
}
19+
dt = datetime.datetime.now()
20+
stats_time = int(dt.strftime("%Y%m%d%H%M%S"))
21+
plan_age_seconds, last_execution_time_seconds = calculate_plan_age_stats(plan_stats, stats_time)
22+
expected_plan_age = (stats_time - plan_stats['creation_time']) / 1000
23+
exected_last_execution_age = (stats_time - plan_stats['last_execution_time']) / 1000
24+
self.assertEqual(expected_plan_age, plan_age_seconds)
25+
self.assertEqual(exected_last_execution_age, last_execution_time_seconds)
26+
27+
class IsEstablishedPlan(TestCase):
28+
29+
# plan is established because plan age is sufficiently old
30+
@mock.patch('plan_monitor.config')
31+
def test_is_established_plan_plan_age(self, conf):
32+
conf.MAX_AGE_OF_LAST_EXECUTION_SECONDS.return_value = 5
33+
conf.MAX_NEW_PLAN_AGE_SECONDS.return_value = 3
34+
dt = datetime.datetime.now()
35+
stats_time = dt.timestamp() * 1000
36+
established_create_ms = get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS + 1))
37+
established_last_execution_ms = get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS - 1))
38+
plan_stats = {
39+
"creation_time": established_create_ms,
40+
"last_execution_time": established_last_execution_ms,
41+
"worst_statement_query_plan_hash": "23424252"
42+
}
43+
44+
plan_age_seconds, last_execution_time_seconds = calculate_plan_age_stats(plan_stats, stats_time)
45+
is_established = is_established_plan(plan_age_seconds, last_execution_time_seconds)
46+
self.assertTrue(is_established)
47+
48+
# plan is established because plan execution age is sufficiently old
49+
@mock.patch('plan_monitor.config')
50+
def test_is_established_plan_execution_age(self, conf):
51+
conf.MAX_AGE_OF_LAST_EXECUTION_SECONDS.return_value = 5
52+
conf.MAX_NEW_PLAN_AGE_SECONDS.return_value = 3
53+
dt = datetime.datetime.now()
54+
stats_time = dt.timestamp() * 1000
55+
established_create_ms = get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS - 1))
56+
established_last_execution_ms = get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS + 1))
57+
58+
plan_stats = {
59+
"creation_time": established_create_ms,
60+
"last_execution_time": established_last_execution_ms,
61+
"worst_statement_query_plan_hash": "23424252"
62+
}
63+
64+
plan_age_seconds, last_execution_time_seconds = calculate_plan_age_stats(plan_stats, stats_time)
65+
is_established = is_established_plan(plan_age_seconds, last_execution_time_seconds)
66+
self.assertTrue(is_established)
67+
68+
# plan is established because plan both execution age and plan age are sufficiently old
69+
@mock.patch('plan_monitor.config')
70+
def test_is_established_plan_both(self, conf):
71+
conf.MAX_AGE_OF_LAST_EXECUTION_SECONDS.return_value = 5
72+
conf.MAX_NEW_PLAN_AGE_SECONDS.return_value = 3
73+
dt = datetime.datetime.now()
74+
stats_time = dt.timestamp() * 1000
75+
established_create_ms = get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS + 1))
76+
established_last_execution_ms = get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS + 1))
77+
plan_stats = {
78+
"creation_time": established_create_ms,
79+
"last_execution_time": established_last_execution_ms,
80+
"worst_statement_query_plan_hash": "23424252"
81+
}
82+
83+
plan_age_seconds, last_execution_time_seconds = calculate_plan_age_stats(plan_stats, stats_time)
84+
is_established = is_established_plan(plan_age_seconds, last_execution_time_seconds)
85+
self.assertTrue(is_established)
86+
87+
# plan is not established
88+
@mock.patch('plan_monitor.config')
89+
def test_is_not_established_plan(self, conf):
90+
conf.MAX_AGE_OF_LAST_EXECUTION_SECONDS.return_value = 5
91+
conf.MAX_NEW_PLAN_AGE_SECONDS.return_value = 3
92+
dt = datetime.datetime.now()
93+
stats_time = dt.timestamp() * 1000
94+
established_create_ms = get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS - 1))
95+
established_last_execution_ms = get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS - 1))
96+
plan_stats = {
97+
"creation_time": established_create_ms,
98+
"last_execution_time": established_last_execution_ms,
99+
"worst_statement_query_plan_hash": "23424252"
100+
}
101+
102+
plan_age_seconds, last_execution_time_seconds = calculate_plan_age_stats(plan_stats, stats_time)
103+
is_established = is_established_plan(plan_age_seconds, last_execution_time_seconds)
104+
self.assertFalse(is_established)
105+
106+
class GetActiveQueryPlanHashes(TestCase):
107+
108+
# an established query plan hash matches an unestablished plan
109+
@mock.patch('plan_monitor.config')
110+
def test_get_query_plan_hash_under_investigation(self, conf):
111+
conf.MAX_AGE_OF_LAST_EXECUTION_SECONDS.return_value = 5
112+
conf.MAX_NEW_PLAN_AGE_SECONDS.return_value = 3
113+
duplicate_query_plan_hash = '2FCDCA2278D3D2A3'
114+
dt = datetime.datetime.now()
115+
stats_time = dt.timestamp() * 1000
116+
plans = {
117+
"plan-one-duplicate-qp-hash-not-established": {
118+
"creation_time": get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS - 1)),
119+
"last_execution_time": get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS - 1)),
120+
"worst_statement_query_plan_hash": duplicate_query_plan_hash
121+
},
122+
"not-a-match-established": {
123+
"creation_time": get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS + 1)),
124+
"last_execution_time": get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS - 1)),
125+
"worst_statement_query_plan_hash": "not-a-query-plan-hash-match"
126+
},
127+
"plan-two-duplicate-qp-hash-is-established": {
128+
"creation_time": get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS + 2)),
129+
"last_execution_time": get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS + 2)),
130+
"worst_statement_query_plan_hash": duplicate_query_plan_hash
131+
}
132+
}
133+
134+
qp_hashes_under_investigation = get_query_plan_hashes_under_investigation(plans, stats_time)
135+
self.assertEqual(len(qp_hashes_under_investigation), 1)
136+
self.assertEqual(qp_hashes_under_investigation.pop(), duplicate_query_plan_hash)
137+
138+
# returns a query hash plan even if neither duplicate is established
139+
@mock.patch('plan_monitor.config')
140+
def test_get_query_plan_hash_under_investigation_returns_qp_hash(self, conf):
141+
conf.MAX_AGE_OF_LAST_EXECUTION_SECONDS.return_value = 5
142+
conf.MAX_NEW_PLAN_AGE_SECONDS.return_value = 3
143+
duplicate_query_plan_hash = '2FCDCA2278D3D2A3'
144+
dt = datetime.datetime.now()
145+
stats_time = dt.timestamp() * 1000
146+
plans = {
147+
"plan-one-duplicate-qp-hash-not-established": {
148+
"creation_time": get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS - 1)),
149+
"last_execution_time": get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS - 1)),
150+
"worst_statement_query_plan_hash": duplicate_query_plan_hash
151+
},
152+
"not-a-match-established": {
153+
"creation_time": get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS + 1)),
154+
"last_execution_time": get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS - 1)),
155+
"worst_statement_query_plan_hash": "not-a-query-plan-hash-match"
156+
},
157+
"plan-two-duplicate-qp-hash-not-established-either": {
158+
"creation_time": get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS - 1)),
159+
"last_execution_time": get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS - 1)),
160+
"worst_statement_query_plan_hash": duplicate_query_plan_hash
161+
}
162+
}
163+
164+
qp_hashes_under_investigation = get_query_plan_hashes_under_investigation(plans, stats_time)
165+
self.assertEqual(len(qp_hashes_under_investigation), 1)
166+
self.assertEqual(qp_hashes_under_investigation.pop(), duplicate_query_plan_hash)
167+
168+
# does not return a query plan hash if duplicates are both established
169+
@mock.patch('plan_monitor.config')
170+
def test_get_query_plan_hash_under_investigation_doesnt_return_established_plans(self, conf):
171+
conf.MAX_AGE_OF_LAST_EXECUTION_SECONDS.return_value = 5
172+
conf.MAX_NEW_PLAN_AGE_SECONDS.return_value = 3
173+
duplicate_query_plan_hash = '2FCDCA2278D3D2A3'
174+
dt = datetime.datetime.now()
175+
stats_time = dt.timestamp() * 1000
176+
plans = {
177+
"plan-one-duplicate-qp-hash-established": {
178+
"creation_time": get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS + 1)),
179+
"last_execution_time": get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS + 1)),
180+
"worst_statement_query_plan_hash": duplicate_query_plan_hash
181+
},
182+
"not-a-match-established": {
183+
"creation_time": get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS + 1)),
184+
"last_execution_time": get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS - 1)),
185+
"worst_statement_query_plan_hash": "not-a-query-plan-hash-match"
186+
},
187+
"plan-two-duplicate-qp-hash-established-either": {
188+
"creation_time": get_time_diff_from_ms(dt, (config.MAX_NEW_PLAN_AGE_SECONDS + 1)),
189+
"last_execution_time": get_time_diff_from_ms(dt, (config.MAX_AGE_OF_LAST_EXECUTION_SECONDS + 1)),
190+
"worst_statement_query_plan_hash": duplicate_query_plan_hash
191+
}
192+
}
193+
194+
qp_hashes_under_investigation = get_query_plan_hashes_under_investigation(plans, stats_time)
195+
self.assertEqual(len(qp_hashes_under_investigation), 0)
196+
197+

tests/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)