Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit fff9b95

Browse files
author
David Robertson
authored
Generate separate snapshots for logical databases (#13792)
* Generate separate snapshots for sqlite, postgres and common * Cleanup postgres dbs in the TRAP * Say which logical DB we're applying updates to * Run background updates on the state DB * Add new option for accepting a SCHEMA_NUMBER
1 parent 42d261c commit fff9b95

File tree

4 files changed

+140
-46
lines changed

4 files changed

+140
-46
lines changed

changelog.d/13792.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Update the script which makes full schema dumps.

scripts-dev/make_full_schema.sh

Lines changed: 125 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,16 @@
22
#
33
# This script generates SQL files for creating a brand new Synapse DB with the latest
44
# schema, on both SQLite3 and Postgres.
5-
#
6-
# It does so by having Synapse generate an up-to-date SQLite DB, then running
7-
# synapse_port_db to convert it to Postgres. It then dumps the contents of both.
85

96
export PGHOST="localhost"
10-
POSTGRES_DB_NAME="synapse_full_schema.$$"
11-
12-
SQLITE_SCHEMA_FILE="schema.sql.sqlite"
13-
SQLITE_ROWS_FILE="rows.sql.sqlite"
14-
POSTGRES_SCHEMA_FILE="full.sql.postgres"
15-
POSTGRES_ROWS_FILE="rows.sql.postgres"
16-
7+
POSTGRES_MAIN_DB_NAME="synapse_full_schema_main.$$"
8+
POSTGRES_COMMON_DB_NAME="synapse_full_schema_common.$$"
9+
POSTGRES_STATE_DB_NAME="synapse_full_schema_state.$$"
1710
REQUIRED_DEPS=("matrix-synapse" "psycopg2")
1811

1912
usage() {
2013
echo
21-
echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n] [-h]"
14+
echo "Usage: $0 -p <postgres_username> -o <path> [-c] [-n <schema number>] [-h]"
2215
echo
2316
echo "-p <postgres_username>"
2417
echo " Username to connect to local postgres instance. The password will be requested"
@@ -27,11 +20,16 @@ usage() {
2720
echo " CI mode. Prints every command that the script runs."
2821
echo "-o <path>"
2922
echo " Directory to output full schema files to."
23+
echo "-n <schema number>"
24+
echo " Schema number for the new snapshot. Used to set the location of files within "
25+
echo " the output directory, mimicking that of synapse/storage/schemas."
26+
echo " Defaults to 9999."
3027
echo "-h"
3128
echo " Display this help text."
3229
}
3330

34-
while getopts "p:co:h" opt; do
31+
SCHEMA_NUMBER="9999"
32+
while getopts "p:co:hn:" opt; do
3533
case $opt in
3634
p)
3735
export PGUSER=$OPTARG
@@ -48,6 +46,9 @@ while getopts "p:co:h" opt; do
4846
usage
4947
exit
5048
;;
49+
n)
50+
SCHEMA_NUMBER="$OPTARG"
51+
;;
5152
\?)
5253
echo "ERROR: Invalid option: -$OPTARG" >&2
5354
usage
@@ -95,12 +96,21 @@ cd "$(dirname "$0")/.."
9596
TMPDIR=$(mktemp -d)
9697
KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path
9798
SQLITE_CONFIG=$TMPDIR/sqlite.conf
98-
SQLITE_DB=$TMPDIR/homeserver.db
99+
SQLITE_MAIN_DB=$TMPDIR/main.db
100+
SQLITE_STATE_DB=$TMPDIR/state.db
101+
SQLITE_COMMON_DB=$TMPDIR/common.db
99102
POSTGRES_CONFIG=$TMPDIR/postgres.conf
100103

101104
# Ensure these files are delete on script exit
102-
# TODO: the trap should also drop the temp postgres DB
103-
trap 'rm -rf $TMPDIR' EXIT
105+
cleanup() {
106+
echo "Cleaning up temporary sqlite database and config files..."
107+
rm -r "$TMPDIR"
108+
echo "Cleaning up temporary Postgres database..."
109+
dropdb --if-exists "$POSTGRES_COMMON_DB_NAME"
110+
dropdb --if-exists "$POSTGRES_MAIN_DB_NAME"
111+
dropdb --if-exists "$POSTGRES_STATE_DB_NAME"
112+
}
113+
trap 'cleanup' EXIT
104114

105115
cat > "$SQLITE_CONFIG" <<EOF
106116
server_name: "test"
@@ -110,10 +120,22 @@ macaroon_secret_key: "abcde"
110120
111121
report_stats: false
112122
113-
database:
114-
name: "sqlite3"
115-
args:
116-
database: "$SQLITE_DB"
123+
databases:
124+
common:
125+
name: "sqlite3"
126+
data_stores: []
127+
args:
128+
database: "$SQLITE_COMMON_DB"
129+
main:
130+
name: "sqlite3"
131+
data_stores: ["main"]
132+
args:
133+
database: "$SQLITE_MAIN_DB"
134+
state:
135+
name: "sqlite3"
136+
data_stores: ["state"]
137+
args:
138+
database: "$SQLITE_STATE_DB"
117139
118140
# Suppress the key server warning.
119141
trusted_key_servers: []
@@ -127,13 +149,32 @@ macaroon_secret_key: "abcde"
127149
128150
report_stats: false
129151
130-
database:
131-
name: "psycopg2"
132-
args:
133-
user: "$PGUSER"
134-
host: "$PGHOST"
135-
password: "$PGPASSWORD"
136-
database: "$POSTGRES_DB_NAME"
152+
databases:
153+
common:
154+
name: "psycopg2"
155+
data_stores: []
156+
args:
157+
user: "$PGUSER"
158+
host: "$PGHOST"
159+
password: "$PGPASSWORD"
160+
database: "$POSTGRES_COMMON_DB_NAME"
161+
main:
162+
name: "psycopg2"
163+
data_stores: ["main"]
164+
args:
165+
user: "$PGUSER"
166+
host: "$PGHOST"
167+
password: "$PGPASSWORD"
168+
database: "$POSTGRES_MAIN_DB_NAME"
169+
state:
170+
name: "psycopg2"
171+
data_stores: ["state"]
172+
args:
173+
user: "$PGUSER"
174+
host: "$PGHOST"
175+
password: "$PGPASSWORD"
176+
database: "$POSTGRES_STATE_DB_NAME"
177+
137178
138179
# Suppress the key server warning.
139180
trusted_key_servers: []
@@ -148,33 +189,76 @@ echo "Running db background jobs..."
148189
synapse/_scripts/update_synapse_database.py --database-config "$SQLITE_CONFIG" --run-background-updates
149190

150191
# Create the PostgreSQL database.
151-
echo "Creating postgres database..."
152-
createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_DB_NAME"
192+
echo "Creating postgres databases..."
193+
createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_COMMON_DB_NAME"
194+
createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_MAIN_DB_NAME"
195+
createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_STATE_DB_NAME"
153196

154197
echo "Running db background jobs..."
155198
synapse/_scripts/update_synapse_database.py --database-config "$POSTGRES_CONFIG" --run-background-updates
156199

157200

158-
# Delete schema_version, applied_schema_deltas and applied_module_schemas tables
159-
# Also delete any shadow tables from fts4
160201
echo "Dropping unwanted db tables..."
161-
SQL="
202+
203+
# Some common tables are created and updated by Synapse itself and do not belong in the
204+
# schema.
205+
DROP_APP_MANAGED_TABLES="
162206
DROP TABLE schema_version;
207+
DROP TABLE schema_compat_version;
163208
DROP TABLE applied_schema_deltas;
164209
DROP TABLE applied_module_schemas;
165210
"
166-
sqlite3 "$SQLITE_DB" <<< "$SQL"
167-
psql "$POSTGRES_DB_NAME" -w <<< "$SQL"
211+
# Other common tables are not created by Synapse and do belong in the schema.
212+
# TODO: we could derive DROP_COMMON_TABLES from the dump of the common-only DB. But
213+
# since there's only one table there, I haven't bothered to do so.
214+
DROP_COMMON_TABLES="$DROP_APP_MANAGED_TABLES
215+
DROP TABLE background_updates;
216+
"
217+
218+
sqlite3 "$SQLITE_COMMON_DB" <<< "$DROP_APP_MANAGED_TABLES"
219+
sqlite3 "$SQLITE_MAIN_DB" <<< "$DROP_COMMON_TABLES"
220+
sqlite3 "$SQLITE_STATE_DB" <<< "$DROP_COMMON_TABLES"
221+
psql "$POSTGRES_COMMON_DB_NAME" -w <<< "$DROP_APP_MANAGED_TABLES"
222+
psql "$POSTGRES_MAIN_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
223+
psql "$POSTGRES_STATE_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
224+
225+
# For Reasons(TM), SQLite's `.schema` also dumps out "shadow tables", the implementation
226+
# details behind full text search tables. Omit these from the dumps.
227+
228+
sqlite3 "$SQLITE_MAIN_DB" <<< "
229+
DROP TABLE event_search_content;
230+
DROP TABLE event_search_segments;
231+
DROP TABLE event_search_segdir;
232+
DROP TABLE event_search_docsize;
233+
DROP TABLE event_search_stat;
234+
DROP TABLE user_directory_search_content;
235+
DROP TABLE user_directory_search_segments;
236+
DROP TABLE user_directory_search_segdir;
237+
DROP TABLE user_directory_search_docsize;
238+
DROP TABLE user_directory_search_stat;
239+
"
168240

169-
echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_SCHEMA_FILE' and '$OUTPUT_DIR/$SQLITE_ROWS_FILE'..."
170-
sqlite3 "$SQLITE_DB" ".schema --indent" > "$OUTPUT_DIR/$SQLITE_SCHEMA_FILE"
171-
sqlite3 "$SQLITE_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/$SQLITE_ROWS_FILE"
241+
echo "Dumping SQLite3 schema..."
242+
243+
mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schema/$SCHEMA_NUMBER"
244+
sqlite3 "$SQLITE_COMMON_DB" ".schema --indent" > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
245+
sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
246+
sqlite3 "$SQLITE_MAIN_DB" ".schema --indent" > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
247+
sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
248+
sqlite3 "$SQLITE_STATE_DB" ".schema --indent" > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
249+
sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
250+
251+
cleanup_pg_schema() {
252+
sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d'
253+
}
172254

173-
echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE' and '$OUTPUT_DIR/$POSTGRES_ROWS_FILE'..."
174-
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE"
175-
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_ROWS_FILE"
255+
echo "Dumping Postgres schema..."
176256

177-
echo "Cleaning up temporary Postgres database..."
178-
dropdb $POSTGRES_DB_NAME
257+
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
258+
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
259+
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
260+
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
261+
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
262+
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
179263

180264
echo "Done! Files dumped to: $OUTPUT_DIR"

synapse/_scripts/update_synapse_database.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,13 @@ def __init__(self, config: HomeServerConfig):
4848

4949

5050
def run_background_updates(hs: HomeServer) -> None:
51-
store = hs.get_datastores().main
51+
main = hs.get_datastores().main
52+
state = hs.get_datastores().state
5253

5354
async def run_background_updates() -> None:
54-
await store.db_pool.updates.run_background_updates(sleep=False)
55+
await main.db_pool.updates.run_background_updates(sleep=False)
56+
if state:
57+
await state.db_pool.updates.run_background_updates(sleep=False)
5558
# Stop the reactor to exit the script once every background update is run.
5659
reactor.stop()
5760

@@ -97,8 +100,11 @@ def main() -> None:
97100
# Load, process and sanity-check the config.
98101
hs_config = yaml.safe_load(args.database_config)
99102

100-
if "database" not in hs_config:
101-
sys.stderr.write("The configuration file must have a 'database' section.\n")
103+
if "database" not in hs_config and "databases" not in hs_config:
104+
sys.stderr.write(
105+
"The configuration file must have a 'database' or 'databases' section. "
106+
"See https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#database"
107+
)
102108
sys.exit(4)
103109

104110
config = HomeServerConfig()

synapse/storage/background_updates.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,10 @@ async def run_background_updates(self, sleep: bool) -> None:
285285
back_to_back_failures = 0
286286

287287
try:
288-
logger.info("Starting background schema updates")
288+
logger.info(
289+
"Starting background schema updates for database %s",
290+
self._database_name,
291+
)
289292
while self.enabled:
290293
try:
291294
result = await self.do_next_background_update(sleep)

0 commit comments

Comments
 (0)