postgres-ai
diff --git a/‎README.md
Lines changed: 79 additions & 9 deletions b/‎README.md
Lines changed: 79 additions & 9 deletions
diff --git a/‎docker/Dockerfile
Lines changed: 53 additions & 0 deletions b/‎docker/Dockerfile
Lines changed: 53 additions & 0 deletions
diff --git a/‎docker/README.md
Lines changed: 7 additions & 0 deletions b/‎docker/README.md
Lines changed: 7 additions & 0 deletions
diff --git a/‎docker/postgresql_10_tweak.conf
Lines changed: 46 additions & 0 deletions b/‎docker/postgresql_10_tweak.conf
Lines changed: 46 additions & 0 deletions
diff --git a/‎docker_deprecated/postgresql.log.conf renamed to ‎docker/postgresql_9.6_tweak.conf
Lines changed: 14 additions & 0 deletions b/‎docker_deprecated/postgresql.log.conf renamed to ‎docker/postgresql_9.6_tweak.conf
Lines changed: 14 additions & 0 deletions
diff --git a/‎docker_deprecated/tmp/tsearch_data/russian.affix renamed to ‎docker/tsearch_data/russian.affix b/‎docker_deprecated/tmp/tsearch_data/russian.affix renamed to ‎docker/tsearch_data/russian.affix
diff --git a/‎docker_deprecated/tmp/tsearch_data/russian.dict renamed to ‎docker/tsearch_data/russian.dict b/‎docker_deprecated/tmp/tsearch_data/russian.dict renamed to ‎docker/tsearch_data/russian.dict
diff --git a/‎docker_deprecated/Dockerfile
Lines changed: 0 additions & 45 deletions b/‎docker_deprecated/Dockerfile
Lines changed: 0 additions & 45 deletions
diff --git a/‎docker_deprecated/README.md
Lines changed: 0 additions & 56 deletions b/‎docker_deprecated/README.md
Lines changed: 0 additions & 56 deletions
diff --git a/‎docker_deprecated/ec2_postgres_configs/10/i3.16xlarge
Lines changed: 0 additions & 28 deletions b/‎docker_deprecated/ec2_postgres_configs/10/i3.16xlarge
Lines changed: 0 additions & 28 deletions
@@ -7,30 +7,78 @@ Nancy helps to conduct automated database experiments.
 The Nancy Command Line Interface is a unified way to manage automated
 database experiments either in clouds or on-premise.
 
-Experiments are needed every time you:
+What is a Database Experiment?
+===
+Database experiment is a set of actions performed to test
+ * (a) specified SQL queries ("workload")
+ * (b) on specified machine / OS / Postgres version ("environment")
+ * (c) against specified database ("object")
+ * (d) with an optional change – some DDL or config change ("target" or "delta").
+
+Two main goals for any database experiment:
+ * (1) validation – check that the specified workload is valid,
+ * (2) benchmark – perform deep SQL query analysis.
+
+Database experiments are needed when you:
  - add or remove indexes;
- - want to verify query optimization ideas;
- - need to tune database parameters;
- - want to perform performance/stress test for your DB;
- - are preparing to upgrade your DBMS to the new major version;
+ - for a new DB schema change, want to validate it and estimate migration time;
+ - want to verify some query optimization ideas;
+ - tune database configuration parameters;
+ - do capacity planning and want to stress-test your DB in some environment;
+ - plan to upgrade your DBMS to a new major version;
  - want to train ML model related to DB optimization.
 
-Currently Nancy works only with PostgreSQL versions 9.6 and 10.
+Currently Supported Features
+===
+* Experiments are conducted in a Docker container with extended Postgres setup
+* Supported Postgres versions: 9.6, 10
+* Supported locations for experimental runs:
+  * Any machine with Docker installed
+  * AWS EC2:
+    * Run on AWS EC2 Spot Instances (using Docker Machine)
+    * Allow to specify EC2 instance type
+    * Auto-detect and use current lowest EC2 Spot Instance prices
+* Support local or remote (S3) files – config, dump, etc
+* What to test (a.k.a. "target" or "delta"):
+  * Test Postgres parameters change
+  * Test DDL change (specified as "do" and "undo" SQL to return state)
+* Supported types of workload:
+  * Use custom SQL as workload
+  * Use "real workload" prepared using Postgres logs
+* For "real workload", allow replaying it with increased speed
+* Allow to keep container alive for specified time after all steps are done
+* Collected artifacts:
+  * Workload SQL logs
+  * Deep SQL query analysis report
 
 Requirements
 ===
-To use Nancy CLI you need Linux or MacOS with installed Docker. If you plan 
-to run experiments in AWS EC2 instances, you also need Docker Machine
-(https://docs.docker.com/machine/).
+1) To use Nancy CLI you need Linux or MacOS with installed Docker.
+
+2) To run on AWS EC2 instances, you also need:
+  * AWS CLI https://aws.amazon.com/en/cli/
+  * Docker Machine https://docs.docker.com/machine/
+  * jq https://stedolan.github.io/jq/
+
 
 Installation
 ===
+
+In the minimal configuration, only two steps are needed:
+
+1) Install Docker (for Ubuntu/Debian: `sudo apt-get install docker`)
+2) Clone this repo and adjust `$PATH`:
 ```bash
 git clone https://github.com/startupturbo/nancy
 echo "export PATH=\$PATH:"$(pwd)"/nancy" >> ~/.bashrc
 source ~/.bashrc
 ```
 
+Additionally, to allow use of AWS EC2 instances:
+3) Follow instructions https://docs.aws.amazon.com/cli/latest/userguide/installing.html
+4) Follow instructions https://docs.docker.com/machine/install-machine/
+5) install jq (for Ubuntu/Debian: `sudo apt-get install jq`)
+
 Getting started
 ===
 Start with these commands:
@@ -39,3 +87,25 @@ nancy help
 nancy run help
 ```
 
+"Hello World!"
+===
+```bash
+echo "create table hello_world as select i::int4 from generate_series(1, 1000000) _(i);" > ./sample.dump
+bzip2 ./sample.dump
+
+# "Clean run": w/o index
+# (seqscan is expected, total time ~150ms, depending on resources)
+nancy run \
+  --run-on localhost \
+  --workload-custom-sql "select count(1) from hello_world where i between 100000 and 100010;" \
+  --db-dump-path file://$(pwd)/sample.dump.bz2 --tmp-path /tmp
+
+# Now check how a regular btree index affects performance
+# (expected total time: ~0.05ms)
+nancy run \
+  --run-on localhost \
+  --workload-custom-sql "select count(1) from hello_world where i between 100000 and 100010;" \
+  --db-dump-path file://$(pwd)/sample.dump.bz2 --tmp-path /tmp \
+  --target-ddl-do "create index i_hello_world_i on hello_world(i);" \
+  --target-ddl-undo "drop index i_hello_world_i;"
+```
@@ -0,0 +1,53 @@
+FROM ubuntu:16.04
+
+ARG PG_SERVER_VERSION
+
+ENV PG_SERVER_VERSION=${PG_SERVER_VERSION:-10} \
+    DEBIAN_FRONTEND=noninteractive
+
+# add custom FTS dictionaries
+ADD ./tsearch_data /usr/share/postgresql/$PG_SERVER_VERSION/tsearch_data
+# logging ON; memory setting – for 2CPU/4096MB/SSD
+ADD ./postgresql_${PG_SERVER_VERSION}_tweak.conf /postgresql.tweak.conf
+
+# install Postgres and postgres-specific software:
+#   - desired version of Postgres server,
+#   - psql version 10
+#   - postgres_dba and pspg
+#   - pgbadger (modified, not lowercasing DB object names, auto_explain compatibility)
+RUN apt-key adv --keyserver hkp://p80.pool.sks-keyservers.net:80 --recv-keys B97B0AFCAA1A47F044F244A07FCC7D46ACCC4CF8 \
+      && echo "deb http://apt.postgresql.org/pub/repos/apt/ xenial-pgdg main"> /etc/apt/sources.list.d/pgdg.list \
+      && apt-get update && apt-get install -y sudo postgresql-$PG_SERVER_VERSION postgresql-contrib-$PG_SERVER_VERSION postgresql-plpython-$PG_SERVER_VERSION \
+      && apt-get install -y postgresql-$PG_SERVER_VERSION-plsh postgresql-server-dev-$PG_SERVER_VERSION postgresql-$PG_SERVER_VERSION-rum \
+      && apt-get install -y git postgresql-client-10 pspg pgreplay jq etcd libjson-xs-perl \
+      && perl -MCPAN -e'install Text::CSV_XS' \
+      && git clone https://github.com/NikolayS/postgres_dba.git /root/postgres_dba \
+      && git clone https://github.com/NikolayS/pgbadger.git /root/pgbadger
+
+# additionally, install newer NodeJS, npm, Sqitch, and more
+RUN wget -q -S -O - https://deb.nodesource.com/setup_8.x | sudo bash \
+      && apt-get install -y s3cmd sudo bzip2 python-software-properties software-properties-common \
+      && apt-get install -y build-essential cpanminus libdbd-pg-perl nginx netcat npm \
+      && npm install -g newman ava \
+      && sudo cpanm --quiet --notest App::Sqitch
+
+# configure psql, configure postgres & check postgres start & stop & prepare start script
+RUN echo "\\set dba '\\\\\\\\i /root/postgres_dba/start.psql'" >> ~/.psqlrc \
+      && echo "\\setenv PAGER 'pspg -bX --no-mouse'" >> ~/.psqlrc \
+      && echo "local   all all trust" > /etc/postgresql/$PG_SERVER_VERSION/main/pg_hba.conf \
+      && echo "host all  all    0.0.0.0/0  md5" >> /etc/postgresql/$PG_SERVER_VERSION/main/pg_hba.conf \
+      && echo "listen_addresses='*'" >> /etc/postgresql/$PG_SERVER_VERSION/main/postgresql.conf \
+      && echo "log_filename='postgresql-$PG_SERVER_VERSION-main.log'" >> /etc/postgresql/$PG_SERVER_VERSION/main/postgresql.conf \
+      && /etc/init.d/postgresql start && psql -U postgres -c 'create database test;' && /etc/init.d/postgresql stop \
+      && cat /postgresql.tweak.conf >> /etc/postgresql/$PG_SERVER_VERSION/main/postgresql.conf \
+      && echo "#!/bin/bash" > /pg_start.sh && chmod a+x /pg_start.sh \
+      && printf "sudo -u postgres /usr/lib/postgresql/$PG_SERVER_VERSION/bin/postgres -D /var/lib/postgresql/$PG_SERVER_VERSION/main -c config_file=/etc/     postgresql/$PG_SERVER_VERSION/main/postgresql.conf & \n" >> /pg_start.sh \
+      && echo "etcd" >> /pg_start.sh
+
+EXPOSE 5432
+
+#VOLUME  ["/etc/postgresql", "/var/log/postgresql", "/var/lib/postgresql"]
+
+# etcd is not being actually used (it's for future needs), but it allows restart Postgres with container interruption
+CMD ["/pg_start.sh"]
+
@@ -0,0 +1,7 @@
+How to build/rebuild:
+
+```bash
+docker build --build-arg PG_SERVER_VERSION=9.6 -t postgresmen/postgres-with-stuff:pg9.6 .
+docker login # you must be registered, go to hub.docker.com
+doker push postgresmen/postgres-with-stuff:pg9.6
+```
@@ -0,0 +1,46 @@
+# Assume we have machine with 2CPU/4096MB/SSD (CircleCI default)
+# IMPORTANT: on faster systems, you need to use your own memory-related settings!
+work_mem = 32MB # warning: tune it if you expect *many* concurrent connections
+shared_buffers = 3GB
+effective_cache_size = 1GB
+maintenance_work_mem = 512MB
+checkpoint_completion_target = 0.7
+wal_buffers = 16MB
+random_page_cost = 1.1
+effective_io_concurrency = 200
+# do not use parallel execution to avoid issues with analysis
+max_worker_processes = 0
+max_parallel_workers_per_gather = 0
+max_parallel_workers = 0
+
+log_destination = 'stderr,csvlog'
+logging_collector = on
+log_directory = '/var/log/postgresql'
+# log_filename – to be set dynamically
+log_min_messages = notice
+log_min_error_statement = notice
+log_min_duration_statement = -1 # rely on "auto_explain.log_min_duration = 0", avoid duplicates
+log_checkpoints = on
+log_connections = on
+log_disconnections = on
+log_line_prefix = '%t [%p]: [%l-1] db=%d,user=%u (%a,%h) '
+log_lock_waits = on
+log_replication_commands = on
+log_temp_files = 0
+log_autovacuum_min_duration = 0
+
+shared_preload_libraries = 'pg_stat_statements,auto_explain'
+
+pg_stat_statements.max = 5000
+pg_stat_statements.track = all
+pg_stat_statements.track_utility = on
+pg_stat_statements.save = on
+
+auto_explain.log_min_duration = 0
+auto_explain.log_analyze = on
+auto_explain.log_verbose = on
+auto_explain.log_buffers = on
+auto_explain.log_format = 'json'
+auto_explain.log_timing = on
+auto_explain.log_triggers = on
+auto_explain.log_nested_statements = on
@@ -1,3 +1,17 @@
+# Assume we have machine with 2CPU/4096MB/SSD (CircleCI default)
+# IMPORTANT: on faster systems, you need to use your own memory-related settings!
+work_mem = 32MB # warning: tune it if you expect *many* concurrent connections
+shared_buffers = 3GB
+effective_cache_size = 1GB
+maintenance_work_mem = 512MB
+checkpoint_completion_target = 0.7
+wal_buffers = 16MB
+random_page_cost = 1.1
+effective_io_concurrency = 200
+# do not use parallel execution to avoid issues with analysis
+max_worker_processes = 0
+max_parallel_workers_per_gather = 0
+
 log_destination = 'stderr,csvlog'
 logging_collector = on
 log_directory = '/var/log/postgresql'