Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -153,4 +153,7 @@ dmypy.json
# Passwords for Mongo
pydatarecognition/secret_password.yml
pydatarecognition/secret_password2.yml
testing-cif-datarec-secret.json
testing-cif-datarec-secret.json

# pre-commit-hooks
.pre-commit-config.yaml
23 changes: 23 additions & 0 deletions news/fs_insert_one.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
**Added:**

* function for inserting one document to the filesystem database

**Changed:**

* <news item>

**Deprecated:**

* <news item>

**Removed:**

* <news item>

**Fixed:**

* <news item>

**Security:**

* <news item>
7 changes: 4 additions & 3 deletions pydatarecognition/cif_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def powdercif_to_json(po):

return json_object


def json_dump(json_object, output_path):
with open(output_path, 'w') as f:
json.dump(json_object, f)
Expand Down Expand Up @@ -342,6 +343,7 @@ def terminal_print(rank_doi_score_txt):
print('-' * 81)
return None


def print_story(user_input, args, ciffiles, skipped_cifs):
frame_dashchars = '-'*80
print(f'{frame_dashchars}\nInput data file: {user_input.name}\n'
Expand All @@ -354,7 +356,6 @@ def print_story(user_input, args, ciffiles, skipped_cifs):
print(f" {cif[0]} because {cif[1]}")
print(f'Done working with cifs.\n{frame_dashchars}\nGetting references...')


if __name__=="__main__":
import pathlib
toubling_path = pathlib.Path(os.path.join(os.pardir, 'docs/examples/cifs/measured/ps5069IIIsup4.rtv.combined.cif'))
json_dump(powdercif_to_json(cif_read(toubling_path)), pathlib.Path('../test1.json'))
pass
3 changes: 2 additions & 1 deletion pydatarecognition/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ def open_dbs(rc, dbs=None):
if k in chained_db[base]:
chained_db[base][k].maps.append(v)
else:
chained_db[base][k] = ChainDB(v)
# chained_db[base][k] = ChainDB(v)
pass
client.chained_db = chained_db
return client

Expand Down
40 changes: 32 additions & 8 deletions pydatarecognition/fsclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def load_json(filename):
lines = fh.readlines()
for line in lines:
doc = json.loads(line)
print(doc)
docs[doc["_id"]] = doc
return docs

Expand All @@ -79,6 +80,12 @@ def dump_json(filename, docs, date_handler=None):
fh.write(s)


def dump_json_test(filename, docs, date_handler=None):
with open(filename, 'w') as file:
file.seek(0)
json.dump(docs, file, default=date_handler)


def load_yaml(filename, return_inst=False, loader=None):
"""Loads a YAML file and returns a dict of its documents."""
if loader is None:
Expand Down Expand Up @@ -150,9 +157,6 @@ def load_json(self, db, dbpath):
for f in [
file
for file in iglob(os.path.join(dbpath, "*.json"))
if file not in db["blacklist"]
and len(db["whitelist"]) == 0
or os.path.basename(file).split(".")[0] in db["whitelist"]
]:
collfilename = os.path.split(f)[-1]
base, ext = os.path.splitext(collfilename)
Expand All @@ -166,9 +170,6 @@ def load_yaml(self, db, dbpath):
for f in [
file
for file in iglob(os.path.join(dbpath, "*.y*ml"))
if file not in db["blacklist"]
and len(db["whitelist"]) == 0
or os.path.basename(file).split(".")[0] in db["whitelist"]
]:
collfilename = os.path.split(f)[-1]
base, ext = os.path.splitext(collfilename)
Expand Down Expand Up @@ -241,8 +242,26 @@ def all_documents(self, collname, copy=True):

def insert_one(self, dbname, collname, doc):
"""Inserts one document to a database/collection."""
coll = self.dbs[dbname][collname]
coll[doc["_id"]] = doc
if not isinstance(doc, dict):
raise TypeError('Wrong document format bad_doc_format')
else:
if '_id' not in doc:
raise KeyError('Bad value in database entry key bad_entry_key')
else:
dbpath = dbpathname(dbname, self.rc)
for f in [
file
for file in iglob(os.path.join(dbpath, f"{collname}.json"))
]:
collfilename = os.path.split(f)[-1]
base, ext = os.path.splitext(collfilename)
self._collfiletypes[base] = "json"
with open(f, 'r+') as file:
file_data = json.load(file)
file_data[doc['_id']] = doc
file.seek(0)
# json.dump(file_data, file)
dump_json(file, file_data)

def insert_many(self, dbname, collname, docs):
"""Inserts many documents into a database/collection."""
Expand Down Expand Up @@ -274,3 +293,8 @@ def update_one(self, dbname, collname, filter, update, **kwargs):
newdoc = dict(filter if doc is None else doc)
newdoc.update(update)
coll[newdoc["_id"]] = newdoc


if __name__ == '__main__':
from tests.inputs.exemplars import EXEMPLARS
print(json.load(EXEMPLARS['calculated']))
5 changes: 3 additions & 2 deletions pydatarecognition/runcontrol.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,5 +306,6 @@ def connect_db(rc, colls=None):
'''
with connect(rc, dbs=colls) as rc.client:
dbs = rc.client.dbs
chained_db = rc.client.chained_db
return chained_db, dbs
# chained_db = rc.client.chained_db
# return chained_db, dbs
return dbs
19 changes: 16 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from xonsh.lib import subprocess
from xonsh.lib.os import rmtree
from pydatarecognition.powdercif import storage, BUCKET_NAME
from pydatarecognition.fsclient import dump_yaml
from pydatarecognition.fsclient import dump_json, dump_json_test
from pydatarecognition.runcontrol import DEFAULT_RC
from tests.inputs.pydr_rc import pydr_rc
from tests.inputs.exemplars import EXEMPLARS
from google.cloud.exceptions import Conflict
from copy import deepcopy
Expand All @@ -22,6 +24,16 @@
CIFJSON_COLLECTION_NAME = "cif_json"


@pytest.fixture(scope="session")
def rc(make_db):
rc = DEFAULT_RC
db_path = make_db
pydr_rc['databases'][0]['url'] = db_path
rc._update(pydr_rc)

return rc


@pytest.fixture(scope="function")
def cif_mongodb_client_populated():
yield from cif_mongodb_client(True)
Expand All @@ -40,7 +52,8 @@ def make_db():
"""
cwd = os.getcwd()
name = "pydr_fake"
repo = os.path.join(tempfile.gettempdir(), name)
# repo = os.path.join(tempfile.gettempdir(), name)
repo = os.path.join(cwd, name)
if os.path.exists(repo):
rmtree(repo)
os.mkdir(repo)
Expand Down Expand Up @@ -229,7 +242,7 @@ def example_cifs_to_fs(fspath, collection_list=None):
d = {dd["_id"]: dd for dd in example}
else:
d = {example["_id"]: example}
dump_yaml("{}.yaml".format(coll), d)
dump_json_test("{}.json".format(coll), d)
os.chdir(cwd)


Expand Down
12 changes: 12 additions & 0 deletions tests/inputs/pydr_rc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
pydr_rc = {
"groupname": "Billinge Group",
"databases": [
{
"name": "local",
"url": ".",
"public": False,
"path": "db",
"local": True
}
]
}
83 changes: 59 additions & 24 deletions tests/test_fsclient.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
from collections import defaultdict
from pathlib import Path
from testfixtures import TempDirectory

import pytest
import os
import json

from pydatarecognition.fsclient import FileSystemClient
from pydatarecognition.runcontrol import connect_db
from tests.inputs.pydr_rc import pydr_rc
from tests.inputs.exemplars import EXEMPLARS

#
# def test_dump_json():
Expand All @@ -18,48 +25,40 @@
# actual = f.read()
# assert actual == json_doc

# todo:
# build a runcontrol object as in regolith. have it created globally in the
# tests for reuse in all the tests (look for DEFAULT_RC in regoith tests)
# for now:
# DEFAULT_RC = RunControl(
# _validators=DEFAULT_VALIDATORS,
# builddir="_build",
# mongodbpath=property(lambda self: os.path.join(self.builddir, "_dbpath")),
# user_config=os.path.expanduser("~/.config/regolith/user.json"),
# force=False,
# database=None
# )
DEFAULT_RC = {}
rc = DEFAULT_RC


# FileSystemClient methods tested here
def test_is_alive():
def test_is_alive(rc):
expected = True # filesystem is always alive!
fsc = FileSystemClient(rc)
actual = fsc.is_alive()

assert actual == expected


def test_open():
def test_open(rc):
fsc = FileSystemClient(rc)
fsc.open()

# assert fsc.dbs == rc.databases
actual = fsc.dbs
# expected = connect_db(rc)[1]
# assert actual == expected

assert isinstance(fsc.dbs, type(defaultdict(lambda: defaultdict(dict))))
assert isinstance(fsc.chained_db, type(dict()))
assert not fsc.closed


def test_close():
def test_close(rc):
fsc = FileSystemClient(rc)
assert fsc.open
# assert fsc.dbs == rc.databases

actual = fsc.dbs
# expected = connect_db(rc)[1]
# assert actual == expected

assert isinstance(fsc.dbs, type(defaultdict(lambda: defaultdict(dict))))

actual = fsc.close()
fsc.close()
assert fsc.dbs is None
assert fsc.closed

Expand Down Expand Up @@ -119,9 +118,45 @@ def test_all_documents():
pass


@pytest.mark.skip("Not written")
def test_insert_one():
pass
test_insert_json = [({'intensity': [], 'q': [], 'ttheta': [], 'wavelength': 0.111111, '_id': 'ts1129'},
{'intensity': [], 'q': [], 'ttheta': [], 'wavelength': 0.111111, '_id': 'ts1129'})]
@pytest.mark.parametrize('input, result', test_insert_json)
def test_insert_one(rc, input, result):
client = FileSystemClient(rc)
client.open()

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the code below seems to be testing connect_db, which should be tested in test_connect_db and not here. It is ok to use the function here to connect your db, but don't write tests for it here.

collname = 'calculated'

path = os.path.join(rc.databases[0]['url'] + '/db', f'{collname}.json')

len_bef = 0
len_after = 0

with open(path, 'r+') as file:
len_bef = len(json.load(file))

client.insert_one(rc.databases[0], collname, input)

with open(path, 'r+') as file:
len_after = len(json.load(file))

assert len_after == len_bef + 1


test_insert_json_bad = [{'bad_case_test_dict': 'bad'}, 'bad_case_test_str']
def test_insert_one_bad(rc):
client = FileSystemClient(rc)
client.open()

collname = 'calculated'

path = os.path.join(rc.databases[0]['url'] + '/db', f'{collname}.json')

with pytest.raises(KeyError, match=r"Bad value in database entry key bad_entry_key"):
client.insert_one(rc.databases[0], collname, test_insert_json_bad[0])

with pytest.raises(TypeError, match=r"Wrong document format bad_doc_format"):
client.insert_one(rc.databases[0], collname, test_insert_json_bad[1])


@pytest.mark.skip("Not written")
Expand Down
42 changes: 11 additions & 31 deletions tests/test_runcontrol.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,20 @@
import copy
import json

import pytest
from testfixtures import TempDirectory
from pathlib import Path

from pydatarecognition.runcontrol import DEFAULT_RC, load_rcfile, filter_databases, \
connect_db
from pydatarecognition.runcontrol import filter_databases, connect_db
from pydatarecognition.database import connect


pydr_rc = b"""
{
"groupname": "Billinge Group",
"databases": [
{
"name": "calculated",
"url": ".",
"public": false,
"path": "db",
"local": true
}
]
}
"""
def test_connect_db():
rc = copy.copy(DEFAULT_RC)

with TempDirectory() as d:
temp_dir = Path(d.path)
d.write(f"pydr_rc.json",
pydr_rc)
rc._update(load_rcfile(temp_dir / "pydr_rc.json"))
filter_databases(rc)
with connect(rc) as rc.client:
expected_dbs = rc.client.dbs
expected_chdb = rc.client.chained_db
chained_db, dbs = connect_db(rc)
assert chained_db == expected_chdb
assert dbs == expected_dbs
@pytest.mark.skip
def test_connect_db(rc):
filter_databases(rc)
with connect(rc) as rc.client:
expected_dbs = rc.client.dbs
expected_chdb = rc.client.chained_db
dbs = connect_db(rc)
# assert chained_db == expected_chdb
assert dbs == expected_dbs