Skip to content

Commit 3290726

Browse files
authored
Added option to output directed graphs of the group heirarchy before and after compression (#43)
It outputs nodes and edges information before and after the compressor has run - these can be visualised in a tool like Gephi A good way to visualise what the compressor is actually doing!
1 parent becb293 commit 3290726

File tree

3 files changed

+98
-2
lines changed

3 files changed

+98
-2
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22
**/*.rs.bk
33
*.data
44
*.old
5-
out.sql
5+
out.sql
6+
*.csv

src/graphing.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
use std::collections::BTreeMap;
2+
use std::{fs::File, io::Write};
3+
4+
use super::StateGroupEntry;
5+
6+
type Graph = BTreeMap<i64, StateGroupEntry>;
7+
8+
/// Outputs information from a state group graph into an edges file and a node file
9+
///
10+
/// These can be loaded into something like Gephi to visualise the graphs
11+
///
12+
/// # Arguments
13+
///
14+
/// * `groups` - A map from state group ids to StateGroupEntries
15+
/// * `edges_output` - The file to output the predecessor link information to
16+
/// * `nodes_output` - The file to output the state group information to
17+
fn output_csv(groups: &Graph, edges_output: &mut File, nodes_output: &mut File) {
18+
// The line A;B in the edges file means:
19+
// That state group A has predecessor B
20+
writeln!(edges_output, "Source;Target",).unwrap();
21+
22+
// The line A;B;C;"B" in the nodes file means:
23+
// The state group id is A
24+
// This state group has B rows in the state_groups_state table
25+
// If C is true then A has no predecessor
26+
writeln!(nodes_output, "Id;Rows;Root;Label",).unwrap();
27+
28+
for (source, entry) in groups {
29+
// If the group has a predecessor then write an edge in the edges file
30+
if let Some(target) = entry.prev_state_group {
31+
writeln!(edges_output, "{};{}", source, target,).unwrap();
32+
}
33+
34+
// Write the state group's information to the nodes file
35+
writeln!(
36+
nodes_output,
37+
"{};{};{};\"{}\"",
38+
source,
39+
entry.state_map.len(),
40+
entry.prev_state_group.is_none(),
41+
entry.state_map.len(),
42+
)
43+
.unwrap();
44+
}
45+
}
46+
47+
/// Outputs information from two state group graph into files
48+
///
49+
/// These can be loaded into something like Gephi to visualise the graphs
50+
/// before and after the compressor is run
51+
///
52+
/// # Arguments
53+
///
54+
/// * `before` - A map from state group ids to StateGroupEntries
55+
/// the information from this map goes into before_edges.csv
56+
/// and before_nodes.csv
57+
/// * `after` - A map from state group ids to StateGroupEntries
58+
/// the information from this map goes into after_edges.csv
59+
/// and after_nodes.csv
60+
pub fn make_graphs(before: Graph, after: Graph) {
61+
// Open all the files to output to
62+
let mut before_edges_file = File::create("before_edges.csv").unwrap();
63+
let mut before_nodes_file = File::create("before_nodes.csv").unwrap();
64+
let mut after_edges_file = File::create("after_edges.csv").unwrap();
65+
let mut after_nodes_file = File::create("after_nodes.csv").unwrap();
66+
67+
// Write before's information to before_edges and before_nodes
68+
output_csv(&before, &mut before_edges_file, &mut before_nodes_file);
69+
// Write afters's information to after_edges and after_nodes
70+
output_csv(&after, &mut after_edges_file, &mut after_nodes_file);
71+
}

src/lib.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
//! Synapse instance's database. Specifically, it aims to reduce the number of
1717
//! rows that a given room takes up in the `state_groups_state` table.
1818
19+
// This file contains configuring config options, which neccessarily means lots
20+
// of arguments - this hopefully doesn't make the code unclear
21+
// #[allow(clippy::too_many_arguments)] is therefore used around some functions
22+
1923
use pyo3::{exceptions, prelude::*};
2024

2125
#[cfg(feature = "jemalloc")]
@@ -31,6 +35,7 @@ use string_cache::DefaultAtom as Atom;
3135

3236
mod compressor;
3337
mod database;
38+
mod graphing;
3439

3540
use compressor::Compressor;
3641
use database::PGEscape;
@@ -72,6 +77,7 @@ pub struct Config {
7277
min_saved_rows: Option<i32>,
7378
transactions: bool,
7479
level_sizes: LevelSizes,
80+
graphs: bool,
7581
}
7682

7783
impl Config {
@@ -137,6 +143,10 @@ impl Config {
137143
))
138144
.default_value("100,50,25")
139145
.takes_value(true),
146+
).arg(
147+
Arg::with_name("graphs")
148+
.short("g")
149+
.help("Whether to produce graphs of state groups before and after compression instead of SQL")
140150
).get_matches();
141151

142152
let db_url = matches
@@ -164,6 +174,8 @@ impl Config {
164174
let level_sizes = value_t!(matches, "level_sizes", LevelSizes)
165175
.unwrap_or_else(|e| panic!("Unable to parse level_sizes: {}", e));
166176

177+
let graphs = matches.is_present("graphs");
178+
167179
Config {
168180
db_url: String::from(db_url),
169181
output_file,
@@ -172,6 +184,7 @@ impl Config {
172184
min_saved_rows,
173185
transactions,
174186
level_sizes,
187+
graphs,
175188
}
176189
}
177190
}
@@ -260,6 +273,10 @@ pub fn run(mut config: Config) {
260273
// transaction.
261274

262275
output_sql(&mut config, &state_group_map, &new_state_group_map);
276+
277+
if config.graphs {
278+
graphing::make_graphs(state_group_map, new_state_group_map);
279+
}
263280
}
264281

265282
/// Produces SQL code to carry out changes and saves it to file
@@ -440,6 +457,7 @@ impl Config {
440457
/// Converts string and bool arguments into a Config struct
441458
///
442459
/// This function panics if db_url or room_id are empty strings!
460+
#[allow(clippy::too_many_arguments)]
443461
pub fn new(
444462
db_url: String,
445463
room_id: String,
@@ -448,6 +466,7 @@ impl Config {
448466
min_saved_rows: Option<i32>,
449467
transactions: bool,
450468
level_sizes: String,
469+
graphs: bool,
451470
) -> Result<Config, String> {
452471
let mut output: Option<File> = None;
453472
if let Some(file) = output_file {
@@ -471,6 +490,7 @@ impl Config {
471490
min_saved_rows,
472491
transactions,
473492
level_sizes,
493+
graphs,
474494
})
475495
}
476496
}
@@ -480,14 +500,16 @@ impl Config {
480500
/// Default arguments are equivalent to using the command line tool
481501
/// No default's are provided for db_url or room_id since these arguments
482502
/// are compulsory (so that new() act's like parse_arguments())
503+
#[allow(clippy::too_many_arguments)]
483504
#[pyfunction(
484505
// db_url has no default
485506
// room_id has no default
486507
output_file = "None",
487508
max_state_group = "None",
488509
min_saved_rows = "None",
489510
transactions = false,
490-
level_sizes = "String::from(\"100,50,25\")"
511+
level_sizes = "String::from(\"100,50,25\")",
512+
graphs = false
491513
)]
492514
fn run_compression(
493515
db_url: String,
@@ -497,6 +519,7 @@ fn run_compression(
497519
min_saved_rows: Option<i32>,
498520
transactions: bool,
499521
level_sizes: String,
522+
graphs: bool,
500523
) -> PyResult<()> {
501524
let config = Config::new(
502525
db_url,
@@ -506,6 +529,7 @@ fn run_compression(
506529
min_saved_rows,
507530
transactions,
508531
level_sizes,
532+
graphs,
509533
);
510534
match config {
511535
Err(e) => Err(PyErr::new::<exceptions::PyException, _>(e)),

0 commit comments

Comments
 (0)