Skip to content

Command line interface

Adrian Viehweger edited this page Apr 3, 2017 · 18 revisions

Note: The following code is for reference only, and not intended as a structured tutorial.

# at place A, create some json
zoo init file.json  # validation checks on records at this stage
# more modifications
zoo commit -m 'new RNA virus assemblies from study'
dat share . # generates link asdew3es...

# in some faraway place B
dat clone asdew3es...
zoo add --db zika --cell new_study 
# rename
zoo drop --db zika --cell new_study --force
zoo add --db zika --cell renamed_study
# now we can do analyses, e.g. MSA against some flaviviruses
zoo commit -m 'new RNA viruses related to flavivirus'

# at some place C, somebody already cloned zoo B
zoo pull # new sequences now present
zoo status --db zika

# This is lucky because B got frustrated and deleted evrything.
zoo destroy --db zika

Example:

zoo init --db zika --cell a zoo/data/cell_a.json
# Initializing data cell.
# Inserted 3 entries into cell "a".

zoo add --db zika --cell a --primkey genbank.a zoo/data/cell_b.json
# Loading data cell.
# Index created on field "genbank.a".
# 1 documents inserted in cell "a".
# 3 duplicates skipped.

zoo add --db zika --cell a cell_c.json
# Loading data cell.
# 2 documents inserted in cell "a".

zoo init --db zika --cell c zoo/data/cell_c_change.json
zoo commit --db zika --cell c --ksize 3,4,5 --n 5 cell_c_change_commit

# now pull these changes to cell "a"


zoo drop --db zika --cell a --force
# Dropped cell "a" from database "zika".

pull

zoo init --db virus --cell original virus.json
# Initializing data cell.
# 3 entries inserted into cell "original".
# Primary key assigned to field "_id".
zoo commit --db virus --cell original --ksize 3,4,5 --n 5 original

mkdir send
cp original.json send/
dat share send/
# Syncing Dat Archive: /Users/pi/tmp/send
# Link: dat://73401e1b931164763ecc5a04fad78e4788682677cefc718ebf49f6b4fe4dbad7

mkdir receive
dat clone receive/
# Download Finished!
# Total size: 1 file (484 B)
ls receive
# original.json

from pymongo import MongoClient

c = MongoClient('localhost:27017')['virus']['original']
[i for i in c.find()]

# we did experiments to replace "N" in bunyavirus seq w/ nucleotides
c.update_one({'virus': 'bunya'}, {'$set': {'sequence': 'ACTACCTTATA'}})
zoo commit --db virus --cell original --ksize 3,4,5 --n 5 original

# before
cat original.json
{"_id": "89d96f57-63d1-4efc-9c5b-13af6473eaad", "alt_id": {"gb": "gb1"}, "md5": "e7b1f3d8199b4b7fd5d54af4a1afac37", "sequence": "ACTAACCTATA", "virus": "flavi"}
{"_id": "ecd51cba-ce85-4c48-b63d-40a29a1b6676", "alt_id": {"gb": "gb1"}, "md5": "87bb94d7795874f84ef7731a823be434", "sequence": "TTTAACCTATA", "virus": "corona"}
{"_id": "780ca018-6267-440d-86e0-56fe0c211d70", "alt_id": {"gb": "gb1"}, "md5": "4c720baa79bfaf58597660b9720cd5d8", "sequence": "ACTANNNNATA", "virus": "bunya"}

zoo commit --db virus --cell original --ksize 3,4,5 --n 5 original
# Dumping data cell.
# | 3 Elapsed Time: 0:00:00
# Done.

# after
cat original.json
{"_id": "89d96f57-63d1-4efc-9c5b-13af6473eaad", "alt_id": {"gb": "gb1"}, "md5": "e7b1f3d8199b4b7fd5d54af4a1afac37", "sequence": "ACTAACCTATA", "virus": "flavi"}
{"_id": "ecd51cba-ce85-4c48-b63d-40a29a1b6676", "alt_id": {"gb": "gb1"}, "md5": "87bb94d7795874f84ef7731a823be434", "sequence": "TTTAACCTATA", "virus": "corona"}
{"_id": "780ca018-6267-440d-86e0-56fe0c211d70", "alt_id": {"gb": "gb1"}, "md5": "e5a49f574d58bfc3d27fe2c93285a199", "sequence": "ACTACCTTATA", "virus": "bunya"}

zoo drop --db virus --cell original --force

dat still buggy. try without and "simulate changed files".

zoo drop --db virus --cell original --force
zoo init --db virus --cell original virus.json
zoo commit --db virus --cell original --ksize 3,4,5 --n 5 original
from pymongo import MongoClient

c = MongoClient('localhost:27017')['virus']['original']
[i for i in c.find()]

# we did experiments to replace "N" in bunyavirus seq w/ nucleotides
c.update_one({'virus': 'bunya'}, {'$set': {'sequence': 'ACTACCTTATA'}})
zoo commit --db virus --cell original --ksize 3,4,5 --n 5 modified
zoo add --db virus --cell modified original.json
zoo pull --db virus --cell modified modified.json
# Updating cell's md5 hashes.
# / 0 Elapsed Time: 0:00:00
# 
# 2 entries unchanged.
# 1 entries replaced.
tail -n1 original.json
# {"_id": "c133bf2d-04b1-4c8e-910a-c41e5376bae5", "alt_id": {"gb": "gb1"}, "md5": "4c720baa79bfaf58597660b9720cd5d8", "sequence": "ACTANNNNATA", "virus": "bunya"}

tail -n1 modified.json
# {"_id": "c133bf2d-04b1-4c8e-910a-c41e5376bae5", "alt_id": {"gb": "gb1"}, "md5": "e5a49f574d58bfc3d27fe2c93285a199", "sequence": "ACTACCTTATA", "virus": "bunya"}

diff

zoo add --db diff --cell mock tests/cell_a.json
zoo diff --db diff --cell mock --out diff.json tests/cell_b.json
cat diff.json

minhash, SBT

$ zoo sbt_index --db ref --cell ref --ksize 16 --nsketch 1000 \
reference
Initialize SBT.
Compute minhash signatures for selected documents.
k-mer size: 16, sketch size: 1000
\ 9158 Elapsed Time: 0:01:45
Save SBT.
Done.
$ sourmash sbt_search --ksize 16 reference survey.fa.sig
# running sourmash subcommand: sbt_search
loaded query: survey.fa... (k=16, DNA)
0.11 0ef85591-d464-4953-915f-f673907b7e8e (Zika reference genome)

status

zoo status --db diff --cell mock --example
Clone this wiki locally