Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -141,4 +141,9 @@ dmypy.json
# Passwords for Mongo
pydatarecognition/secret_password.yml
pydatarecognition/secret_password2.yml
testing-cif-datarec-secret.json
testing-cif-datarec-secret.json
literature-powder-search-b8cdff9cda5f.json

# test CIFs
/iucr_cif_remediated
literature-powder-search-firebase-adminsdk-3lqys-9ac4c7afdf.json
8 changes: 4 additions & 4 deletions pydatarecognition/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,13 @@ async def footer_term(request: Request):
}
)

@app.get('/login', tags=['authentication']) # Tag it as "authentication" for our docs
@app.get('/login', include_in_schema=False)
async def login(request: Request):

return templates.TemplateResponse('login.html', {"request": request, "user": None})


@app.get('/google_login', tags=['authentication']) # Tag it as "authentication" for our docs
@app.get('/google_login', tags=['authentication'])
async def google_login(request: Request):
# Redirect Google OAuth back to our application
redirect_uri = request.url_for('auth')
Expand Down Expand Up @@ -162,7 +162,7 @@ async def get_documentation(request: Request):
return response


@app.route('/cif_search', methods=['GET'])
@app.route('/cif_search', methods=['GET'], include_in_schema=False)
@login_required
async def cif_search(request: Request):
"""
Expand All @@ -180,7 +180,7 @@ async def cif_search(request: Request):
})


@app.post('/cif_search', tags=['Web Interface'])
@app.post('/cif_search', include_in_schema=False)
async def upload_data_cif(request: Request, user_input: bytes = File(...), wavelength: str = Form(...),
filter_key: str = Form(None), filter_value: str = Form(None),
datatype: Literal["twotheta", "q"] = Form(...), user: Optional[dict] = Depends(get_user)):
Expand Down
19 changes: 16 additions & 3 deletions pydatarecognition/cif_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
DEG = "deg"


class CifReadError(ValueError):
pass


def cif_read(cif_file_path):
'''
given a cif file-path, reads the cif and returns the cif data
Expand Down Expand Up @@ -41,16 +45,25 @@ def cif_read(cif_file_path):
else:
print("Getting from Cif File")
cifdata = CifFile.ReadCif(_fixIfWindowsPath(str(cif_file_path)))
cif_twotheta = np.char.split(cifdata[cifdata.keys()[0]]['_pd_proc_2theta_corrected'], '(')
unprocessed_cif_twotheta = cifdata[cifdata.keys()[0]].get('_pd_proc_2theta_corrected')
if unprocessed_cif_twotheta is None:
print("cif file did not contain _pd_proc_2theta_corrected")
raise CifReadError
cif_twotheta = np.char.split(unprocessed_cif_twotheta, '(')
cif_twotheta = np.array([float(e[0]) for e in cif_twotheta])
cif_intensity = np.char.split(cifdata[cifdata.keys()[0]]['_pd_proc_intensity_total'], '(')
unprocessed_cif_intensity = cifdata[cifdata.keys()[0]].get('_pd_proc_intensity_total')
if unprocessed_cif_intensity is None:
print("cif file did not contain _pd_proc_intensity_total")
raise CifReadError
cif_intensity = np.char.split(unprocessed_cif_intensity, '(')
cif_intensity = np.array([float(e[0]) for e in cif_intensity])
for key in cifdata.keys():
wavelength_kwargs = {}
#ZT Question: why isn't this _pd_proc_wavelength rather than _diffrn_radiation_wavelength?
cif_wavelength = cifdata[key].get('_diffrn_radiation_wavelength')
if isinstance(cif_wavelength, list):
wavelength_kwargs['wavelength'] = float(cif_wavelength[0]) # FIXME Handle lists
# FIXME stopped on file sk3312Isup2.rtv.combined.cif in iucr_cif_remediated because of could not convert string to float: '0.6940(10)' in the above line
wavelength_kwargs['wavel_units'] = "ang"
break # FIXME Don't just go with first instance of wavelength.
elif isinstance(cif_wavelength, str):
Expand All @@ -62,7 +75,7 @@ def cif_read(cif_file_path):
if not cif_wavelength:
wavelength_kwargs['wavelength'] = None
po = PydanticPowderCif(cif_file_path.stem[0:6],
DEG, cif_twotheta, cif_intensity, cif_file_path=cif_file_path.stem,
DEG, cif_twotheta, cif_intensity, cif_file_name=cif_file_path.stem,
**wavelength_kwargs
)
#TODO serialize all as json rather than npy save and see if how the cache speed compares
Expand Down
53 changes: 40 additions & 13 deletions pydatarecognition/mongo_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path
import json

from pydatarecognition.cif_io import cif_read
from pydatarecognition.cif_io import cif_read, CifReadError
from pymongo import MongoClient


Expand Down Expand Up @@ -36,24 +36,51 @@ def cifs_to_mongo(mongo_db_uri: str, mongo_db_name: str, mongo_collection_name:
from google.cloud import storage
from google.cloud.exceptions import Conflict

import firebase_admin
from firebase_admin import credentials, firestore

filepath = Path(os.path.abspath(__file__))

if os.path.isfile(os.path.join(filepath.parent.absolute(), '../requirements/testing-cif-datarec-secret.json')):
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(filepath.parent.absolute(),
'../requirements/testing-cif-datarec-secret.json')
# ensure that the bucket that cif_read will be dumping data into exists
storage_client = storage.Client()
try:
storage_client.create_bucket('raw_cif_data')
storage_client.create_bucket('cif_data')
except Conflict:
pass
CIF_DIR = filepath.parent.parent / 'docs' / 'examples' / 'cifs'
with open('secret_password.yml', 'r') as f:
secret_dict = yaml.safe_load(f)

# testing out firebase
cred = credentials.Certificate(os.path.join(filepath.parent.absolute(), "../requirements/literature-powder-search-firebase-adminsdk-3lqys-9ac4c7afdf.json"))
firebase_app = firebase_admin.initialize_app(cred, {'databaseURL': "literature-powder-search"})
firestore_db = firestore.client()

cif_filepath = filepath.parent.parent / 'iucr_cif_remediated'
ciffiles = Path(cif_filepath).glob("*.cif")
for ciffile in ciffiles:
print(ciffile.name)
ciffile_path = Path(ciffile)
try:
pcd = cif_read(ciffile_path)
except CifReadError:
continue
dict = json.loads(pcd.json(by_alias=True))
data_ref = firestore_db.collection('data').document()
data_ref.set(dict)

# if os.path.isfile(os.path.join(filepath.parent.absolute(), '../requirements/testing-cif-datarec-secret.json')):
# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(filepath.parent.absolute(),
# '../requirements/testing-cif-datarec-secret.json')


# CIF_DIR = filepath.parent.parent / 'docs' / 'examples' / 'cifs'
# with open('secret_password.yml', 'r') as f:
# secret_dict = yaml.safe_load(f)
# URI for group DB f'mongodb+srv://{secret_dict["username"]}:{secret_dict["password"]}@cluster0.9bj1h.mongodb.net/?retryWrites=true&w=majority'
# URI for zt altas db f'mongodb+srv://{secret_dict["username"]}:{secret_dict["password"]}@sidewinder.uc5ro.mongodb.net/?retryWrites=true&w=majority'
client = cifs_to_mongo(f'mongodb+srv://{secret_dict["username"]}:{secret_dict["password"]}@sidewinder.uc5ro.mongodb.net/?retryWrites=true&w=majority', "test",
"cif", CIF_DIR)
db = client["test"]
coll = db["cif"]
mongo_collections = list(coll.find({}))

# commented out calls to
# client = cifs_to_mongo(f'mongodb+srv://{secret_dict["username"]}:{secret_dict["password"]}@sidewinder.uc5ro.mongodb.net/?retryWrites=true&w=majority', "test",
# "cif", CIF_DIR)
# db = client["test"]
# coll = db["cif"]
# mongo_collections = list(coll.find({}))
pass
6 changes: 3 additions & 3 deletions pydatarecognition/powdercif.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,16 @@
MODEL_VERSION = '0.0.1'

filepath = Path(os.path.abspath(__file__))
if os.path.isfile(os.path.join(filepath.parent.absolute(), '../requirements/testing-cif-datarec-secret.json')):
if os.path.isfile(os.path.join(filepath.parent.absolute(), '../requirements/literature-powder-search-b8cdff9cda5f.json')):
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(filepath.parent.absolute(),
'../requirements/testing-cif-datarec-secret.json')
'../requirements/literature-powder-search-b8cdff9cda5f.json')
elif os.path.isfile(os.path.join(filepath.parent.absolute(), '../requirements/production-cif-datarec-secret.json')):
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(filepath.parent.absolute(),
'../requirements/production-cif-datarec-secret.json')
else:
print('Google API credentials not found. See README.md if you intend to read/write to the external database')

BUCKET_NAME = 'raw_cif_data'
BUCKET_NAME = 'cif_data'
DAYS_CACHED = 5
GCS_METADATA = {
'pydantic_powder_model_version': MODEL_VERSION,
Expand Down
Empty file added tests/inputs/__init__.py
Empty file.