-
Notifications
You must be signed in to change notification settings - Fork 2
Command line interface
Adrian Viehweger edited this page Mar 29, 2017
·
18 revisions
Note: The following code is for reference only, and not intended as a structured tutorial.
# at place A, create some json
zoo init file.json # validation checks on records at this stage
# more modifications
zoo commit -m 'new RNA virus assemblies from study'
dat share . # generates link asdew3es...
# in some faraway place B
dat clone asdew3es...
zoo add --db zika --cell new_study
# rename
zoo drop --db zika --cell new_study --force
zoo add --db zika --cell renamed_study
# now we can do analyses, e.g. MSA against some flaviviruses
zoo commit -m 'new RNA viruses related to flavivirus'
# at some place C, somebody already cloned zoo B
zoo pull # new sequences now present
zoo status --db zika
# This is lucky because B got frustrated and deleted evrything.
zoo destroy --db zika
Example:
zoo init --db zika --cell a zoo/data/cell_a.json
# Initializing data cell.
# Inserted 3 entries into cell "a".
zoo add --db zika --cell a --primkey genbank.a zoo/data/cell_b.json
# Loading data cell.
# Index created on field "genbank.a".
# 1 documents inserted in cell "a".
# 3 duplicates skipped.
zoo add --db zika --cell a cell_c.json
# Loading data cell.
# 2 documents inserted in cell "a".
zoo init --db zika --cell c zoo/data/cell_c_change.json
zoo commit --db zika --cell c --ksize 3,4,5 --n 5 cell_c_change_commit
# now pull these changes to cell "a"
zoo drop --db zika --cell a --force
# Dropped cell "a" from database "zika".
pull
zoo init --db virus --cell original virus.json
# Initializing data cell.
# 3 entries inserted into cell "original".
# Primary key assigned to field "_id".
zoo commit --db virus --cell original --ksize 3,4,5 --n 5 original
mkdir send
cp original.json send/
dat share send/
# Syncing Dat Archive: /Users/pi/tmp/send
# Link: dat://73401e1b931164763ecc5a04fad78e4788682677cefc718ebf49f6b4fe4dbad7
mkdir receive
dat clone receive/
# Download Finished!
# Total size: 1 file (484 B)
ls receive
# original.json
from pymongo import MongoClient
c = MongoClient('localhost:27017')['virus']['original']
[i for i in c.find()]
# we did experiments to replace "N" in bunyavirus seq w/ nucleotides
c.update_one({'virus': 'bunya'}, {'$set': {'sequence': 'ACTACCTTATA'}})
zoo commit --db virus --cell original --ksize 3,4,5 --n 5 original
# before
cat original.json
{"_id": "89d96f57-63d1-4efc-9c5b-13af6473eaad", "alt_id": {"gb": "gb1"}, "md5": "e7b1f3d8199b4b7fd5d54af4a1afac37", "sequence": "ACTAACCTATA", "virus": "flavi"}
{"_id": "ecd51cba-ce85-4c48-b63d-40a29a1b6676", "alt_id": {"gb": "gb1"}, "md5": "87bb94d7795874f84ef7731a823be434", "sequence": "TTTAACCTATA", "virus": "corona"}
{"_id": "780ca018-6267-440d-86e0-56fe0c211d70", "alt_id": {"gb": "gb1"}, "md5": "4c720baa79bfaf58597660b9720cd5d8", "sequence": "ACTANNNNATA", "virus": "bunya"}
zoo commit --db virus --cell original --ksize 3,4,5 --n 5 original
# Dumping data cell.
# | 3 Elapsed Time: 0:00:00
# Done.
# after
cat original.json
{"_id": "89d96f57-63d1-4efc-9c5b-13af6473eaad", "alt_id": {"gb": "gb1"}, "md5": "e7b1f3d8199b4b7fd5d54af4a1afac37", "sequence": "ACTAACCTATA", "virus": "flavi"}
{"_id": "ecd51cba-ce85-4c48-b63d-40a29a1b6676", "alt_id": {"gb": "gb1"}, "md5": "87bb94d7795874f84ef7731a823be434", "sequence": "TTTAACCTATA", "virus": "corona"}
{"_id": "780ca018-6267-440d-86e0-56fe0c211d70", "alt_id": {"gb": "gb1"}, "md5": "e5a49f574d58bfc3d27fe2c93285a199", "sequence": "ACTACCTTATA", "virus": "bunya"}
zoo drop --db virus --cell original --force
dat still buggy. try without and "simulate changed files".
zoo drop --db virus --cell original --force
zoo init --db virus --cell original virus.json
zoo commit --db virus --cell original --ksize 3,4,5 --n 5 original
from pymongo import MongoClient
c = MongoClient('localhost:27017')['virus']['original']
[i for i in c.find()]
# we did experiments to replace "N" in bunyavirus seq w/ nucleotides
c.update_one({'virus': 'bunya'}, {'$set': {'sequence': 'ACTACCTTATA'}})
zoo commit --db virus --cell original --ksize 3,4,5 --n 5 modified
zoo add --db virus --cell modified original.json
zoo pull --db virus --cell modified modified.json
# Updating cell's md5 hashes.
# / 0 Elapsed Time: 0:00:00
#
# 2 entries unchanged.
# 1 entries replaced.
tail -n1 original.json
# {"_id": "c133bf2d-04b1-4c8e-910a-c41e5376bae5", "alt_id": {"gb": "gb1"}, "md5": "4c720baa79bfaf58597660b9720cd5d8", "sequence": "ACTANNNNATA", "virus": "bunya"}
tail -n1 modified.json
# {"_id": "c133bf2d-04b1-4c8e-910a-c41e5376bae5", "alt_id": {"gb": "gb1"}, "md5": "e5a49f574d58bfc3d27fe2c93285a199", "sequence": "ACTACCTTATA", "virus": "bunya"}