Skip to content

Commit d8f5d43

Browse files
authored
Fix autocommit footguns in performance tests
psycopg2 has the following warning related to autocommit: > By default, any query execution, including a simple SELECT will start > a transaction: for long-running programs, if no further action is > taken, the session will remain “idle in transaction”, an undesirable > condition for several reasons (locks are held by the session, tables > bloat…). For long lived scripts, either ensure to terminate a > transaction as soon as possible or use an autocommit connection. In the 2.9 release notes, psycopg2 also made the following change: > `with connection` starts a transaction on autocommit transactions too Some of these connections are indeed long-lived, so we were retaining tons of WAL on the endpoints because we had a transaction pinned in the past. Link: https://www.psycopg.org/docs/news.html#what-s-new-in-psycopg-2-9 Link: psycopg/psycopg2#941 Signed-off-by: Tristan Partin <[email protected]>
1 parent 2256a57 commit d8f5d43

File tree

2 files changed

+99
-75
lines changed

2 files changed

+99
-75
lines changed

test_runner/performance/test_logical_replication.py

Lines changed: 61 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -149,12 +149,16 @@ def test_subscriber_lag(
149149
check_pgbench_still_running(pub_workload, "pub")
150150
check_pgbench_still_running(sub_workload, "sub")
151151

152-
with (
153-
psycopg2.connect(pub_connstr) as pub_conn,
154-
psycopg2.connect(sub_connstr) as sub_conn,
155-
):
156-
with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
157-
lag = measure_logical_replication_lag(sub_cur, pub_cur)
152+
pub_conn = psycopg2.connect(pub_connstr)
153+
sub_conn = psycopg2.connect(sub_connstr)
154+
pub_conn.autocommit = True
155+
sub_conn.autocommit = True
156+
157+
with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
158+
lag = measure_logical_replication_lag(sub_cur, pub_cur)
159+
160+
pub_conn.close()
161+
sub_conn.close()
158162

159163
log.info(f"Replica lagged behind master by {lag} seconds")
160164
zenbenchmark.record("replica_lag", lag, "s", MetricReport.LOWER_IS_BETTER)
@@ -206,6 +210,7 @@ def test_publisher_restart(
206210
sub_conn = psycopg2.connect(sub_connstr)
207211
pub_conn.autocommit = True
208212
sub_conn.autocommit = True
213+
209214
with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
210215
pub_cur.execute("SELECT 1 FROM pg_catalog.pg_publication WHERE pubname = 'pub1'")
211216
pub_exists = len(pub_cur.fetchall()) != 0
@@ -222,6 +227,7 @@ def test_publisher_restart(
222227
sub_cur.execute(f"create subscription sub1 connection '{pub_connstr}' publication pub1")
223228

224229
initial_sync_lag = measure_logical_replication_lag(sub_cur, pub_cur)
230+
225231
pub_conn.close()
226232
sub_conn.close()
227233

@@ -248,12 +254,17 @@ def test_publisher_restart(
248254
["pgbench", "-c10", pgbench_duration, "-Mprepared"],
249255
env=pub_env,
250256
)
251-
with (
252-
psycopg2.connect(pub_connstr) as pub_conn,
253-
psycopg2.connect(sub_connstr) as sub_conn,
254-
):
255-
with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
256-
lag = measure_logical_replication_lag(sub_cur, pub_cur)
257+
258+
pub_conn = psycopg2.connect(pub_connstr)
259+
sub_conn = psycopg2.connect(sub_connstr)
260+
pub_conn.autocommit = True
261+
sub_conn.autocommit = True
262+
263+
with pub_conn.cursor() as pub_cur, sub_conn.cursor() as sub_cur:
264+
lag = measure_logical_replication_lag(sub_cur, pub_cur)
265+
266+
pub_conn.close()
267+
sub_conn.close()
257268

258269
log.info(f"Replica lagged behind master by {lag} seconds")
259270
zenbenchmark.record("replica_lag", lag, "s", MetricReport.LOWER_IS_BETTER)
@@ -288,58 +299,56 @@ def test_snap_files(
288299
env = benchmark_project_pub.pgbench_env
289300
connstr = benchmark_project_pub.connstr
290301

291-
with psycopg2.connect(connstr) as conn:
292-
conn.autocommit = True
293-
with conn.cursor() as cur:
294-
cur.execute("SELECT rolsuper FROM pg_roles WHERE rolname = 'neondb_owner'")
295-
is_super = cast("bool", cur.fetchall()[0][0])
296-
assert is_super, "This benchmark won't work if we don't have superuser"
302+
conn = psycopg2.connect(connstr)
303+
conn.autocommit = True
304+
305+
with conn.cursor() as cur:
306+
cur.execute("SELECT rolsuper FROM pg_roles WHERE rolname = 'neondb_owner'")
307+
is_super = cast("bool", cur.fetchall()[0][0])
308+
assert is_super, "This benchmark won't work if we don't have superuser"
309+
310+
conn.close()
297311

298312
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s100"], env=env)
299313

300314
conn = psycopg2.connect(connstr)
301315
conn.autocommit = True
302-
cur = conn.cursor()
303-
cur.execute("ALTER SYSTEM SET neon.logical_replication_max_snap_files = -1")
304-
305-
with psycopg2.connect(connstr) as conn:
306-
conn.autocommit = True
307-
with conn.cursor() as cur:
308-
cur.execute("SELECT pg_reload_conf()")
309-
310-
with psycopg2.connect(connstr) as conn:
311-
conn.autocommit = True
312-
with conn.cursor() as cur:
313-
cur.execute(
314-
"""
315-
DO $$
316-
BEGIN
317-
IF EXISTS (
318-
SELECT 1
319-
FROM pg_replication_slots
320-
WHERE slot_name = 'slotter'
321-
) THEN
322-
PERFORM pg_drop_replication_slot('slotter');
323-
END IF;
324-
END $$;
316+
317+
with conn.cursor() as cur:
318+
cur.execute(
325319
"""
326-
)
327-
cur.execute("SELECT pg_create_logical_replication_slot('slotter', 'test_decoding')")
320+
DO $$
321+
BEGIN
322+
IF EXISTS (
323+
SELECT 1
324+
FROM pg_replication_slots
325+
WHERE slot_name = 'slotter'
326+
) THEN
327+
PERFORM pg_drop_replication_slot('slotter');
328+
END IF;
329+
END $$;
330+
"""
331+
)
332+
cur.execute("SELECT pg_create_logical_replication_slot('slotter', 'test_decoding')")
333+
334+
conn.close()
328335

329336
workload = pg_bin.run_nonblocking(["pgbench", "-c10", pgbench_duration, "-Mprepared"], env=env)
330337
try:
331338
start = time.time()
332339
prev_measurement = time.time()
333340
while time.time() - start < test_duration_min * 60:
334-
with psycopg2.connect(connstr) as conn:
335-
with conn.cursor() as cur:
336-
cur.execute(
337-
"SELECT count(*) FROM (SELECT pg_log_standby_snapshot() FROM generate_series(1, 10000) g) s"
338-
)
339-
check_pgbench_still_running(workload)
340-
cur.execute(
341-
"SELECT pg_replication_slot_advance('slotter', pg_current_wal_lsn())"
342-
)
341+
conn = psycopg2.connect(connstr)
342+
conn.autocommit = True
343+
344+
with conn.cursor() as cur:
345+
cur.execute(
346+
"SELECT count(*) FROM (SELECT pg_log_standby_snapshot() FROM generate_series(1, 10000) g) s"
347+
)
348+
check_pgbench_still_running(workload)
349+
cur.execute("SELECT pg_replication_slot_advance('slotter', pg_current_wal_lsn())")
350+
351+
conn.close()
343352

344353
# Measure storage
345354
if time.time() - prev_measurement > test_interval_min * 60:

test_runner/performance/test_physical_replication.py

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -102,15 +102,21 @@ def test_ro_replica_lag(
102102
check_pgbench_still_running(master_workload)
103103
check_pgbench_still_running(replica_workload)
104104
time.sleep(sync_interval_min * 60)
105+
106+
conn_master = psycopg2.connect(master_connstr)
107+
conn_replica = psycopg2.connect(replica_connstr)
108+
conn_master.autocommit = True
109+
conn_replica.autocommit = True
110+
105111
with (
106-
psycopg2.connect(master_connstr) as conn_master,
107-
psycopg2.connect(replica_connstr) as conn_replica,
112+
conn_master.cursor() as cur_master,
113+
conn_replica.cursor() as cur_replica,
108114
):
109-
with (
110-
conn_master.cursor() as cur_master,
111-
conn_replica.cursor() as cur_replica,
112-
):
113-
lag = measure_replication_lag(cur_master, cur_replica)
115+
lag = measure_replication_lag(cur_master, cur_replica)
116+
117+
conn_master.close()
118+
conn_replica.close()
119+
114120
log.info(f"Replica lagged behind master by {lag} seconds")
115121
zenbenchmark.record("replica_lag", lag, "s", MetricReport.LOWER_IS_BETTER)
116122
finally:
@@ -219,11 +225,15 @@ def test_replication_start_stop(
219225
pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s10"], env=master_env)
220226

221227
# Sync replicas
222-
with psycopg2.connect(master_connstr) as conn_master:
223-
with conn_master.cursor() as cur_master:
224-
for i in range(num_replicas):
225-
conn_replica = psycopg2.connect(replica_connstr[i])
226-
measure_replication_lag(cur_master, conn_replica.cursor())
228+
conn_master = psycopg2.connect(master_connstr)
229+
conn_master.autocommit = True
230+
231+
with conn_master.cursor() as cur_master:
232+
for i in range(num_replicas):
233+
conn_replica = psycopg2.connect(replica_connstr[i])
234+
measure_replication_lag(cur_master, conn_replica.cursor())
235+
236+
conn_master.close()
227237

228238
master_pgbench = pg_bin.run_nonblocking(
229239
[
@@ -277,17 +287,22 @@ def replica_enabled(iconfig: int = iconfig):
277287

278288
time.sleep(configuration_test_time_sec)
279289

280-
with psycopg2.connect(master_connstr) as conn_master:
281-
with conn_master.cursor() as cur_master:
282-
for ireplica in range(num_replicas):
283-
replica_conn = psycopg2.connect(replica_connstr[ireplica])
284-
lag = measure_replication_lag(cur_master, replica_conn.cursor())
285-
zenbenchmark.record(
286-
f"Replica {ireplica} lag", lag, "s", MetricReport.LOWER_IS_BETTER
287-
)
288-
log.info(
289-
f"Replica {ireplica} lagging behind master by {lag} seconds after configuration {iconfig:>b}"
290-
)
290+
conn_master = psycopg2.connect(master_connstr)
291+
conn_master.autocommit = True
292+
293+
with conn_master.cursor() as cur_master:
294+
for ireplica in range(num_replicas):
295+
replica_conn = psycopg2.connect(replica_connstr[ireplica])
296+
lag = measure_replication_lag(cur_master, replica_conn.cursor())
297+
zenbenchmark.record(
298+
f"Replica {ireplica} lag", lag, "s", MetricReport.LOWER_IS_BETTER
299+
)
300+
log.info(
301+
f"Replica {ireplica} lagging behind master by {lag} seconds after configuration {iconfig:>b}"
302+
)
303+
304+
conn_master.close()
305+
291306
master_pgbench.terminate()
292307
except Exception as e:
293308
error_occurred = True

0 commit comments

Comments
 (0)