Skip to content

Commit a887107

Browse files
authored
Add documentation (#41)
Added documentation to lib.rs and database.rs files
1 parent 013080f commit a887107

File tree

2 files changed

+122
-40
lines changed

2 files changed

+122
-40
lines changed

src/database.rs

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,21 @@ use std::{borrow::Cow, collections::BTreeMap, fmt};
2121

2222
use super::StateGroupEntry;
2323

24-
/// Fetch the entries in state_groups_state (and their prev groups) for the
25-
/// given `room_id` by connecting to the postgres database at `db_url`.
24+
/// Fetch the entries in state_groups_state (and their prev groups) for a
25+
/// specific room.
26+
///
27+
/// - Connects to the database
28+
/// - Fetches rows with group id lower than max
29+
/// - Recursively searches for missing predecessors and adds those
30+
///
31+
/// # Arguments
32+
///
33+
/// * `room_id` - The ID of the room in the database
34+
/// * `db_url` - The URL of a Postgres database. This should be of the
35+
/// form: "postgresql://user:pass@domain:port/database"
36+
/// * `max_state_group` - If specified, then only fetch the entries for state
37+
/// groups lower than or equal to this number. (N.B. all
38+
/// predecessors are also fetched)
2639
pub fn get_data_from_db(
2740
db_url: &str,
2841
room_id: &str,
@@ -43,6 +56,9 @@ pub fn get_data_from_db(
4356
// in our DB queries, so we have to fetch any missing groups explicitly.
4457
// Since the returned groups may themselves reference groups we don't have,
4558
// we need to do this recursively until we don't find any more missing.
59+
//
60+
// N.B. This does NOT currently fetch the deltas for the missing groups!
61+
// By carefully chosen max_state_group this might cause issues...?
4662
loop {
4763
let mut missing_sgs: Vec<_> = state_group_map
4864
.iter()
@@ -76,13 +92,25 @@ pub fn get_data_from_db(
7692
state_group_map
7793
}
7894

79-
/// Fetch the entries in state_groups_state (and their prev groups) for the
80-
/// given `room_id` by fetching all state with the given `room_id`.
95+
/// Fetch the entries in state_groups_state and immediate predecessors for
96+
/// a specific room.
97+
///
98+
/// - Fetches rows with group id lower than max
99+
/// - Stores the group id, predecessor id and deltas into a map
100+
///
101+
/// # Arguments
102+
///
103+
/// * `client` - A Postgres client to make requests with
104+
/// * `room_id` - The ID of the room in the database
105+
/// * `max_state_group` - If specified, then only fetch the entries for state
106+
/// groups lower than or equal to this number. (N.B. doesn't
107+
/// fetch IMMEDIATE predecessors if ID is above this number)
81108
fn get_initial_data_from_db(
82109
client: &mut Client,
83110
room_id: &str,
84111
max_state_group: Option<i64>,
85112
) -> BTreeMap<i64, StateGroupEntry> {
113+
// Query to get id, predecessor and delta for each state group
86114
let sql = r#"
87115
SELECT m.id, prev_state_group, type, state_key, s.event_id
88116
FROM state_groups AS m
@@ -91,6 +119,8 @@ fn get_initial_data_from_db(
91119
WHERE m.room_id = $1
92120
"#;
93121

122+
// Adds additional constraint if a max_state_group has been specified
123+
// Then sends query to the datatbase
94124
let mut rows = if let Some(s) = max_state_group {
95125
let params: Vec<&dyn ToSql> = vec![&room_id, &s];
96126
client.query_raw(format!(r"{} AND m.id <= $2", sql).as_str(), params)
@@ -99,6 +129,8 @@ fn get_initial_data_from_db(
99129
}
100130
.unwrap();
101131

132+
// Copy the data from the database into a map
133+
102134
let mut state_group_map: BTreeMap<i64, StateGroupEntry> = BTreeMap::new();
103135

104136
let pb = ProgressBar::new_spinner();
@@ -108,10 +140,13 @@ fn get_initial_data_from_db(
108140
pb.enable_steady_tick(100);
109141

110142
while let Some(row) = rows.next().unwrap() {
143+
// The row in the map to copy the data to
111144
let entry = state_group_map.entry(row.get(0)).or_default();
112145

146+
// Save the predecessor (this may already be there)
113147
entry.prev_state_group = row.get(1);
114148

149+
// Copy the single delta from the predecessor stored in this row
115150
if let Some(etype) = row.get::<_, Option<String>>(2) {
116151
entry.state_map.insert(
117152
&etype,
@@ -129,7 +164,14 @@ fn get_initial_data_from_db(
129164
state_group_map
130165
}
131166

132-
/// Get any missing state groups from the database
167+
/// Finds the predecessors of missing state groups
168+
///
169+
/// N.B. this does NOT find their deltas
170+
///
171+
/// # Arguments
172+
///
173+
/// * `client` - A Postgres client to make requests with
174+
/// * `missing_sgs` - An array of missing state_group ids
133175
fn get_missing_from_db(client: &mut Client, missing_sgs: &[i64]) -> BTreeMap<i64, StateGroupEntry> {
134176
let mut rows = client
135177
.query_raw(

src/lib.rs

Lines changed: 75 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,9 @@
1616
//! Synapse instance's database. Specifically, it aims to reduce the number of
1717
//! rows that a given room takes up in the `state_groups_state` table.
1818
19-
mod compressor;
20-
mod database;
21-
2219
#[global_allocator]
2320
static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc;
2421

25-
use compressor::Compressor;
26-
use database::PGEscape;
27-
2822
use clap::{
2923
crate_authors, crate_description, crate_name, crate_version, value_t_or_exit, App, Arg,
3024
};
@@ -34,6 +28,12 @@ use state_map::StateMap;
3428
use std::{collections::BTreeMap, fs::File, io::Write, str::FromStr};
3529
use string_cache::DefaultAtom as Atom;
3630

31+
mod compressor;
32+
mod database;
33+
34+
use compressor::Compressor;
35+
use database::PGEscape;
36+
3737
/// An entry for a state group. Consists of an (optional) previous group and the
3838
/// delta from that previous group (or the full state if no previous group)
3939
#[derive(Default, Debug, Clone, PartialEq, Eq)]
@@ -42,33 +42,6 @@ pub struct StateGroupEntry {
4242
state_map: StateMap<Atom>,
4343
}
4444

45-
/// Gets the full state for a given group from the map (of deltas)
46-
fn collapse_state_maps(map: &BTreeMap<i64, StateGroupEntry>, state_group: i64) -> StateMap<Atom> {
47-
let mut entry = &map[&state_group];
48-
let mut state_map = StateMap::new();
49-
50-
let mut stack = vec![state_group];
51-
52-
while let Some(prev_state_group) = entry.prev_state_group {
53-
stack.push(prev_state_group);
54-
if !map.contains_key(&prev_state_group) {
55-
panic!("Missing {}", prev_state_group);
56-
}
57-
entry = &map[&prev_state_group];
58-
}
59-
60-
for sg in stack.iter().rev() {
61-
state_map.extend(
62-
map[&sg]
63-
.state_map
64-
.iter()
65-
.map(|((t, s), e)| ((t, s), e.clone())),
66-
);
67-
}
68-
69-
state_map
70-
}
71-
7245
/// Helper struct for parsing the `level_sizes` argument.
7346
struct LevelSizes(Vec<usize>);
7447

@@ -89,6 +62,7 @@ impl FromStr for LevelSizes {
8962
}
9063
}
9164

65+
/// Contains configuration information for this run of the compressor
9266
pub struct Config {
9367
db_url: String,
9468
output_file: Option<File>,
@@ -100,6 +74,7 @@ pub struct Config {
10074
}
10175

10276
impl Config {
77+
/// Build up config from command line arguments
10378
pub fn parse_arguments() -> Config {
10479
let matches = App::new(crate_name!())
10580
.version(crate_version!())
@@ -199,9 +174,22 @@ impl Config {
199174
}
200175
}
201176

202-
pub fn run(mut config: Config) {
203-
// let mut config = Config::parse_arguments();
177+
/// Runs through the steps of the compression:
178+
///
179+
/// - Fetches current state groups for a room and their predecessors
180+
/// - Outputs #state groups and #lines in table they occupy
181+
/// - Runs the compressor to produce a new predecessor mapping
182+
/// - Outputs #lines in table that the new mapping would occupy
183+
/// - Outputs info about how the compressor got on
184+
/// - Checks that number of lines saved is greater than threshold
185+
/// - Ensures new mapping doesn't affect actual state contents
186+
/// - Produces SQL code to carry out changes and saves it to file
187+
///
188+
/// # Arguments
189+
///
190+
/// * `config: Config` - A Config struct that controlls the run
204191
192+
pub fn run(mut config: Config) {
205193
// First we need to get the current state groups
206194
println!("Fetching state from DB for room '{}'...", config.room_id);
207195

@@ -272,6 +260,17 @@ pub fn run(mut config: Config) {
272260
output_sql(&mut config, &state_group_map, &new_state_group_map);
273261
}
274262

263+
/// Produces SQL code to carry out changes and saves it to file
264+
///
265+
/// # Arguments
266+
///
267+
/// * `config` - A Config struct that contains information
268+
/// about the run. It's mutable because it contains
269+
/// the pointer to the output file (which needs to
270+
/// be mutable for the file to be written to)
271+
/// * `old_map` - The state group data originally in the database
272+
/// * `new_map` - The state group data generated by the compressor to
273+
/// replace replace the old contents
275274
fn output_sql(
276275
config: &mut Config,
277276
old_map: &BTreeMap<i64, StateGroupEntry>,
@@ -353,6 +352,20 @@ fn output_sql(
353352
pb.finish();
354353
}
355354

355+
/// Compares two sets of state groups
356+
///
357+
/// A state group entry contains a predecessor state group and a delta.
358+
/// The complete contents of a certain state group can be calculated by
359+
/// following this chain of predecessors back to some empty state and
360+
/// combining all the deltas together. This is called "collapsing".
361+
///
362+
/// This function confirms that two state groups mappings lead to the
363+
/// exact same entries for each state group after collapsing them down.
364+
///
365+
/// # Arguments
366+
/// * `old_map` - The state group data currently in the database
367+
/// * `new_map` - The state group data that the old_map is being compared
368+
/// to
356369
fn check_that_maps_match(
357370
old_map: &BTreeMap<i64, StateGroupEntry>,
358371
new_map: &BTreeMap<i64, StateGroupEntry>,
@@ -391,3 +404,30 @@ fn check_that_maps_match(
391404

392405
println!("New state map matches old one");
393406
}
407+
408+
/// Gets the full state for a given group from the map (of deltas)
409+
fn collapse_state_maps(map: &BTreeMap<i64, StateGroupEntry>, state_group: i64) -> StateMap<Atom> {
410+
let mut entry = &map[&state_group];
411+
let mut state_map = StateMap::new();
412+
413+
let mut stack = vec![state_group];
414+
415+
while let Some(prev_state_group) = entry.prev_state_group {
416+
stack.push(prev_state_group);
417+
if !map.contains_key(&prev_state_group) {
418+
panic!("Missing {}", prev_state_group);
419+
}
420+
entry = &map[&prev_state_group];
421+
}
422+
423+
for sg in stack.iter().rev() {
424+
state_map.extend(
425+
map[&sg]
426+
.state_map
427+
.iter()
428+
.map(|((t, s), e)| ((t, s), e.clone())),
429+
);
430+
}
431+
432+
state_map
433+
}

0 commit comments

Comments
 (0)