moleculemaker
diff --git a/‎.github/workflows/docker.yml‎
Lines changed: 71 additions & 0 deletions b/‎.github/workflows/docker.yml‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎job/.dockerignore‎
Lines changed: 16 additions & 0 deletions b/‎job/.dockerignore‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎job/.env‎
Lines changed: 30 additions & 0 deletions b/‎job/.env‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎job/.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎job/.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎job/Dockerfile‎
Lines changed: 19 additions & 0 deletions b/‎job/Dockerfile‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎job/README.md‎
Lines changed: 8 additions & 0 deletions b/‎job/README.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎job/chemscraper/.gitignore‎
Lines changed: 23 additions & 0 deletions b/‎job/chemscraper/.gitignore‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎job/chemscraper/README.md‎
Lines changed: 124 additions & 0 deletions b/‎job/chemscraper/README.md‎
Lines changed: 124 additions & 0 deletions
diff --git a/‎job/chemscraper/fast_api_client/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎job/chemscraper/fast_api_client/__init__.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎job/chemscraper/fast_api_client/api/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎job/chemscraper/fast_api_client/api/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,71 @@
+name: Docker
+
+# This will run when:
+# - when new code is pushed to main/develop to push the tags
+#   latest and develop
+# - when a pull request is created and updated  to make sure the
+#   Dockerfile is still valid.
+# To be able to push to dockerhub, this execpts the following
+# secrets to be set in the project:
+# - DOCKERHUB_USERNAME : username that can push to the org
+# - DOCKERHUB_PASSWORD : password asscoaited with the username
+on:
+  push:
+    branches:
+      - main
+      - develop
+
+  pull_request:
+  
+  # Trigger the workflow on release activity
+  release:
+    # Only use the types keyword to narrow down the activity types that will trigger your workflow.
+    types:
+      - published
+      - edited
+      - created
+
+# Certain actions will only run when this is the main repo.
+env:
+  MAIN_REPO: moleculemaker/chemscraper-helm-chart
+  DOCKERHUB_ORG: moleculemaker
+
+jobs:
+  docker:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - uses: actions/checkout@v3
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: |
+            moleculemaker/chemscraper-job
+          tags: |
+            # set latest tag for default branch
+            type=raw,value=latest,enable={{is_default_branch}}
+            type=ref,event=branch
+            type=ref,event=pr
+            type=ref,event=tag
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_PASSWORD }}
+
+
+      - name: Build and push
+        uses: docker/build-push-action@v3
+        with:
+          context: job
+          platforms: linux/amd64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
@@ -0,0 +1,16 @@
+# Git metadata
+.git/
+.gitignore
+.github/
+
+# Docker metadata
+.env
+docker-compose.yml
+Dockerfile
+
+# This stuff doesn't end up in the container
+#chemscraper/
+README.md
+inputs/
+outputs/
+
@@ -0,0 +1,30 @@
+# mmli-backend passes our current JOB_ID as an environment variable
+JOB_ID='examplejobid'
+
+# Default log level - adjust for more/less verbose logs
+#  e.g. DEBUG, INFO, WARNING, ERROR
+LOG_LEVEL='DEBUG'
+
+# Path to input PDFs for RM+CS
+# Files in this path will be automatically downloaded from MinIO before running the AlphaSynthesis job
+CHEMSCRAPER_INPUT_FILE='/usr/app/inputs/or100.09.tables.small.pdf'
+
+# Path to output from full RM+CS workflow
+# We store to the same path as ReactionMiner outputs, so that this is also uploaded to MinIO
+CHEMSCRAPER_OUTPUT_DIR='/usr/app/outputs'
+
+# Base URL to ChemScraper API
+# We will override this default in production
+
+# External
+CHEMSCRAPER_BASE_URL='https://chemscraper.backend.staging.mmli1.ncsa.illinois.edu'  # Staging
+#CHEMSCRAPER_BASE_URL='https://chemscraper.backend.mmli1.ncsa.illinois.edu'          # Prod
+
+
+# Internal
+#CHEMSCRAPER_BASE_URL='http://chemscraper-services-staging.staging.svc.cluster.local:8000'    # Staging
+#CHEMSCRAPER_BASE_URL='http://chemscraper-services.alphasynthesis.svc.cluster.local:8000'     # Prod
+
+
+# Local
+#CHEMSCRAPER_BASE_URL='http://host.docker.internal:8000'
@@ -0,0 +1,3 @@
+.idea/
+**/__pycache__/**/*.pyc
+outputs/*
@@ -0,0 +1,19 @@
+FROM mambaorg/micromamba:alpine
+WORKDIR /usr/app/
+
+# Use a drop-in for conda executable
+ENV CONDA='micromamba'
+
+# Install dependencies using conda
+COPY environment.yml .
+RUN ${CONDA} env update -n base -f environment.yml && ${CONDA} clean --all 
+SHELL [ "${CONDA}", "run", "-n", "base" ]
+
+# Generate ChemScraper API Client
+# FIXME: We needed to manually change one line of the generated code
+#COPY generate-client.sh .
+#RUN ./generate-client.sh
+
+# Entrypoint script
+COPY run_chemscraper.py .
+CMD [ "python", "./run_chemscraper.py" ]
@@ -0,0 +1,8 @@
+## ChemScraper Docker Job
+A slim container designed to execute submit a single PDF to ChemScraper
+
+### Usage
+
+
+### Building
+
@@ -0,0 +1,23 @@
+__pycache__/
+build/
+dist/
+*.egg-info/
+.pytest_cache/
+
+# pyenv
+.python-version
+
+# Environments
+.env
+.venv
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# JetBrains
+.idea/
+
+/coverage.xml
+/.coverage
@@ -0,0 +1,124 @@
+# fast-api-client
+A client library for accessing FastAPI
+
+## Usage
+First, create a client:
+
+```python
+from fast_api_client import Client
+
+client = Client(base_url="https://api.example.com")
+```
+
+If the endpoints you're going to hit require authentication, use `AuthenticatedClient` instead:
+
+```python
+from fast_api_client import AuthenticatedClient
+
+client = AuthenticatedClient(base_url="https://api.example.com", token="SuperSecretToken")
+```
+
+Now call your endpoint and use your models:
+
+```python
+from fast_api_client.models import MyDataModel
+from fast_api_client.api.my_tag import get_my_data_model
+from fast_api_client.types import Response
+
+with client as client:
+    my_data: MyDataModel = get_my_data_model.sync(client=client)
+    # or if you need more info (e.g. status_code)
+    response: Response[MyDataModel] = get_my_data_model.sync_detailed(client=client)
+```
+
+Or do the same thing with an async version:
+
+```python
+from fast_api_client.models import MyDataModel
+from fast_api_client.api.my_tag import get_my_data_model
+from fast_api_client.types import Response
+
+async with client as client:
+    my_data: MyDataModel = await get_my_data_model.asyncio(client=client)
+    response: Response[MyDataModel] = await get_my_data_model.asyncio_detailed(client=client)
+```
+
+By default, when you're calling an HTTPS API it will attempt to verify that SSL is working correctly. Using certificate verification is highly recommended most of the time, but sometimes you may need to authenticate to a server (especially an internal server) using a custom certificate bundle.
+
+```python
+client = AuthenticatedClient(
+    base_url="https://internal_api.example.com", 
+    token="SuperSecretToken",
+    verify_ssl="/path/to/certificate_bundle.pem",
+)
+```
+
+You can also disable certificate validation altogether, but beware that **this is a security risk**.
+
+```python
+client = AuthenticatedClient(
+    base_url="https://internal_api.example.com", 
+    token="SuperSecretToken", 
+    verify_ssl=False
+)
+```
+
+Things to know:
+1. Every path/method combo becomes a Python module with four functions:
+    1. `sync`: Blocking request that returns parsed data (if successful) or `None`
+    1. `sync_detailed`: Blocking request that always returns a `Request`, optionally with `parsed` set if the request was successful.
+    1. `asyncio`: Like `sync` but async instead of blocking
+    1. `asyncio_detailed`: Like `sync_detailed` but async instead of blocking
+
+1. All path/query params, and bodies become method arguments.
+1. If your endpoint had any tags on it, the first tag will be used as a module name for the function (my_tag above)
+1. Any endpoint which did not have a tag will be in `fast_api_client.api.default`
+
+## Advanced customizations
+
+There are more settings on the generated `Client` class which let you control more runtime behavior, check out the docstring on that class for more info. You can also customize the underlying `httpx.Client` or `httpx.AsyncClient` (depending on your use-case):
+
+```python
+from fast_api_client import Client
+
+def log_request(request):
+    print(f"Request event hook: {request.method} {request.url} - Waiting for response")
+
+def log_response(response):
+    request = response.request
+    print(f"Response event hook: {request.method} {request.url} - Status {response.status_code}")
+
+client = Client(
+    base_url="https://api.example.com",
+    httpx_args={"event_hooks": {"request": [log_request], "response": [log_response]}},
+)
+
+# Or get the underlying httpx client to modify directly with client.get_httpx_client() or client.get_async_httpx_client()
+```
+
+You can even set the httpx client directly, but beware that this will override any existing settings (e.g., base_url):
+
+```python
+import httpx
+from fast_api_client import Client
+
+client = Client(
+    base_url="https://api.example.com",
+)
+# Note that base_url needs to be re-set, as would any shared cookies, headers, etc.
+client.set_httpx_client(httpx.Client(base_url="https://api.example.com", proxies="http://localhost:8030"))
+```
+
+## Building / publishing this package
+This project uses [Poetry](https://python-poetry.org/) to manage dependencies  and packaging.  Here are the basics:
+1. Update the metadata in pyproject.toml (e.g. authors, version)
+1. If you're using a private repository, configure it with Poetry
+    1. `poetry config repositories.<your-repository-name> <url-to-your-repository>`
+    1. `poetry config http-basic.<your-repository-name> <username> <password>`
+1. Publish the client with `poetry publish --build -r <your-repository-name>` or, if for public PyPI, just `poetry publish --build`
+
+If you want to install this client into another project without publishing it (e.g. for development) then:
+1. If that project **is using Poetry**, you can simply do `poetry add <path-to-this-client>` from that project
+1. If that project is not using Poetry:
+    1. Build a wheel with `poetry build -f wheel`
+    1. Install that wheel from the other project `pip install <path-to-wheel>`
@@ -0,0 +1,8 @@
+"""A client library for accessing FastAPI"""
+
+from .client import AuthenticatedClient, Client
+
+__all__ = (
+    "AuthenticatedClient",
+    "Client",
+)
@@ -0,0 +1 @@
+"""Contains methods for accessing the API"""
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+.idea/`
	`2`	`+/__pycache__//*.pyc`
	`3`	`+outputs/*`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+"""Contains methods for accessing the API"""`