diff --git a/.gitignore b/.gitignore index 40b9f26..8ca7b2a 100644 --- a/.gitignore +++ b/.gitignore @@ -141,4 +141,9 @@ dmypy.json # Passwords for Mongo pydatarecognition/secret_password.yml pydatarecognition/secret_password2.yml -testing-cif-datarec-secret.json \ No newline at end of file +testing-cif-datarec-secret.json +literature-powder-search-b8cdff9cda5f.json + +# test CIFs +/iucr_cif_remediated +literature-powder-search-firebase-adminsdk-3lqys-9ac4c7afdf.json \ No newline at end of file diff --git a/pydatarecognition/app.py b/pydatarecognition/app.py index 51066ca..60d7d41 100644 --- a/pydatarecognition/app.py +++ b/pydatarecognition/app.py @@ -115,13 +115,13 @@ async def footer_term(request: Request): } ) -@app.get('/login', tags=['authentication']) # Tag it as "authentication" for our docs +@app.get('/login', include_in_schema=False) async def login(request: Request): return templates.TemplateResponse('login.html', {"request": request, "user": None}) -@app.get('/google_login', tags=['authentication']) # Tag it as "authentication" for our docs +@app.get('/google_login', tags=['authentication']) async def google_login(request: Request): # Redirect Google OAuth back to our application redirect_uri = request.url_for('auth') @@ -162,7 +162,7 @@ async def get_documentation(request: Request): return response -@app.route('/cif_search', methods=['GET']) +@app.route('/cif_search', methods=['GET'], include_in_schema=False) @login_required async def cif_search(request: Request): """ @@ -180,7 +180,7 @@ async def cif_search(request: Request): }) -@app.post('/cif_search', tags=['Web Interface']) +@app.post('/cif_search', include_in_schema=False) async def upload_data_cif(request: Request, user_input: bytes = File(...), wavelength: str = Form(...), filter_key: str = Form(None), filter_value: str = Form(None), datatype: Literal["twotheta", "q"] = Form(...), user: Optional[dict] = Depends(get_user)): diff --git a/pydatarecognition/cif_io.py b/pydatarecognition/cif_io.py index 352a80c..ae59002 100644 --- a/pydatarecognition/cif_io.py +++ b/pydatarecognition/cif_io.py @@ -10,6 +10,10 @@ DEG = "deg" +class CifReadError(ValueError): + pass + + def cif_read(cif_file_path): ''' given a cif file-path, reads the cif and returns the cif data @@ -41,9 +45,17 @@ def cif_read(cif_file_path): else: print("Getting from Cif File") cifdata = CifFile.ReadCif(_fixIfWindowsPath(str(cif_file_path))) - cif_twotheta = np.char.split(cifdata[cifdata.keys()[0]]['_pd_proc_2theta_corrected'], '(') + unprocessed_cif_twotheta = cifdata[cifdata.keys()[0]].get('_pd_proc_2theta_corrected') + if unprocessed_cif_twotheta is None: + print("cif file did not contain _pd_proc_2theta_corrected") + raise CifReadError + cif_twotheta = np.char.split(unprocessed_cif_twotheta, '(') cif_twotheta = np.array([float(e[0]) for e in cif_twotheta]) - cif_intensity = np.char.split(cifdata[cifdata.keys()[0]]['_pd_proc_intensity_total'], '(') + unprocessed_cif_intensity = cifdata[cifdata.keys()[0]].get('_pd_proc_intensity_total') + if unprocessed_cif_intensity is None: + print("cif file did not contain _pd_proc_intensity_total") + raise CifReadError + cif_intensity = np.char.split(unprocessed_cif_intensity, '(') cif_intensity = np.array([float(e[0]) for e in cif_intensity]) for key in cifdata.keys(): wavelength_kwargs = {} @@ -51,6 +63,7 @@ def cif_read(cif_file_path): cif_wavelength = cifdata[key].get('_diffrn_radiation_wavelength') if isinstance(cif_wavelength, list): wavelength_kwargs['wavelength'] = float(cif_wavelength[0]) # FIXME Handle lists + # FIXME stopped on file sk3312Isup2.rtv.combined.cif in iucr_cif_remediated because of could not convert string to float: '0.6940(10)' in the above line wavelength_kwargs['wavel_units'] = "ang" break # FIXME Don't just go with first instance of wavelength. elif isinstance(cif_wavelength, str): @@ -62,7 +75,7 @@ def cif_read(cif_file_path): if not cif_wavelength: wavelength_kwargs['wavelength'] = None po = PydanticPowderCif(cif_file_path.stem[0:6], - DEG, cif_twotheta, cif_intensity, cif_file_path=cif_file_path.stem, + DEG, cif_twotheta, cif_intensity, cif_file_name=cif_file_path.stem, **wavelength_kwargs ) #TODO serialize all as json rather than npy save and see if how the cache speed compares diff --git a/pydatarecognition/mongo_utils.py b/pydatarecognition/mongo_utils.py index 68c63ac..be532fc 100644 --- a/pydatarecognition/mongo_utils.py +++ b/pydatarecognition/mongo_utils.py @@ -1,7 +1,7 @@ from pathlib import Path import json -from pydatarecognition.cif_io import cif_read +from pydatarecognition.cif_io import cif_read, CifReadError from pymongo import MongoClient @@ -36,24 +36,51 @@ def cifs_to_mongo(mongo_db_uri: str, mongo_db_name: str, mongo_collection_name: from google.cloud import storage from google.cloud.exceptions import Conflict + import firebase_admin + from firebase_admin import credentials, firestore + filepath = Path(os.path.abspath(__file__)) - if os.path.isfile(os.path.join(filepath.parent.absolute(), '../requirements/testing-cif-datarec-secret.json')): - os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(filepath.parent.absolute(), - '../requirements/testing-cif-datarec-secret.json') + # ensure that the bucket that cif_read will be dumping data into exists storage_client = storage.Client() try: - storage_client.create_bucket('raw_cif_data') + storage_client.create_bucket('cif_data') except Conflict: pass - CIF_DIR = filepath.parent.parent / 'docs' / 'examples' / 'cifs' - with open('secret_password.yml', 'r') as f: - secret_dict = yaml.safe_load(f) + + # testing out firebase + cred = credentials.Certificate(os.path.join(filepath.parent.absolute(), "../requirements/literature-powder-search-firebase-adminsdk-3lqys-9ac4c7afdf.json")) + firebase_app = firebase_admin.initialize_app(cred, {'databaseURL': "literature-powder-search"}) + firestore_db = firestore.client() + + cif_filepath = filepath.parent.parent / 'iucr_cif_remediated' + ciffiles = Path(cif_filepath).glob("*.cif") + for ciffile in ciffiles: + print(ciffile.name) + ciffile_path = Path(ciffile) + try: + pcd = cif_read(ciffile_path) + except CifReadError: + continue + dict = json.loads(pcd.json(by_alias=True)) + data_ref = firestore_db.collection('data').document() + data_ref.set(dict) + + # if os.path.isfile(os.path.join(filepath.parent.absolute(), '../requirements/testing-cif-datarec-secret.json')): + # os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(filepath.parent.absolute(), + # '../requirements/testing-cif-datarec-secret.json') + + + # CIF_DIR = filepath.parent.parent / 'docs' / 'examples' / 'cifs' + # with open('secret_password.yml', 'r') as f: + # secret_dict = yaml.safe_load(f) # URI for group DB f'mongodb+srv://{secret_dict["username"]}:{secret_dict["password"]}@cluster0.9bj1h.mongodb.net/?retryWrites=true&w=majority' # URI for zt altas db f'mongodb+srv://{secret_dict["username"]}:{secret_dict["password"]}@sidewinder.uc5ro.mongodb.net/?retryWrites=true&w=majority' - client = cifs_to_mongo(f'mongodb+srv://{secret_dict["username"]}:{secret_dict["password"]}@sidewinder.uc5ro.mongodb.net/?retryWrites=true&w=majority', "test", - "cif", CIF_DIR) - db = client["test"] - coll = db["cif"] - mongo_collections = list(coll.find({})) + + # commented out calls to + # client = cifs_to_mongo(f'mongodb+srv://{secret_dict["username"]}:{secret_dict["password"]}@sidewinder.uc5ro.mongodb.net/?retryWrites=true&w=majority', "test", + # "cif", CIF_DIR) + # db = client["test"] + # coll = db["cif"] + # mongo_collections = list(coll.find({})) pass diff --git a/pydatarecognition/powdercif.py b/pydatarecognition/powdercif.py index 3974e14..a9b2d14 100644 --- a/pydatarecognition/powdercif.py +++ b/pydatarecognition/powdercif.py @@ -17,16 +17,16 @@ MODEL_VERSION = '0.0.1' filepath = Path(os.path.abspath(__file__)) -if os.path.isfile(os.path.join(filepath.parent.absolute(), '../requirements/testing-cif-datarec-secret.json')): +if os.path.isfile(os.path.join(filepath.parent.absolute(), '../requirements/literature-powder-search-b8cdff9cda5f.json')): os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(filepath.parent.absolute(), - '../requirements/testing-cif-datarec-secret.json') + '../requirements/literature-powder-search-b8cdff9cda5f.json') elif os.path.isfile(os.path.join(filepath.parent.absolute(), '../requirements/production-cif-datarec-secret.json')): os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = os.path.join(filepath.parent.absolute(), '../requirements/production-cif-datarec-secret.json') else: print('Google API credentials not found. See README.md if you intend to read/write to the external database') -BUCKET_NAME = 'raw_cif_data' +BUCKET_NAME = 'cif_data' DAYS_CACHED = 5 GCS_METADATA = { 'pydantic_powder_model_version': MODEL_VERSION, diff --git a/tests/inputs/__init__.py b/tests/inputs/__init__.py new file mode 100644 index 0000000..e69de29