VariantEffect
diff --git a/‎.github/workflows/run-tests-on-push.yml‎
Lines changed: 13 additions & 0 deletions b/‎.github/workflows/run-tests-on-push.yml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 0 additions & 4 deletions b/‎.gitignore‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎Dockerfile‎
Lines changed: 2 additions & 2 deletions b/‎Dockerfile‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Dockerfile.test‎
Lines changed: 17 additions & 0 deletions b/‎Dockerfile.test‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎Dockerfile.worker‎
Lines changed: 51 additions & 0 deletions b/‎Dockerfile.worker‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 19 additions & 23 deletions b/‎README.md‎
Lines changed: 19 additions & 23 deletions
diff --git a/‎alembic/manual_migrations/README‎
Lines changed: 1 addition & 0 deletions b/‎alembic/manual_migrations/README‎
Lines changed: 1 addition & 0 deletions
@@ -42,3 +42,16 @@ jobs:
     - run: pip install .[dev,server]
     - run: pytest tests/
 
+  run-mypy-3_10:
+    runs-on: ubuntu-latest
+    name: MyPy checks on Python 3.10
+    steps:
+    - uses: actions/checkout@v4
+    - uses: actions/setup-python@v5
+      with:
+        python-version: "3.10"
+        cache: 'pip'
+    - run: pip install --upgrade pip
+    - run: pip install .[dev,server]
+    - run: mypy src/
+
@@ -94,10 +94,6 @@ ipython_config.py
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 __pypackages__/
 
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
 # SageMath parsed files
 *.sage.py
 
 
@@ -3,7 +3,7 @@ FROM python:3.9 AS downloader
 WORKDIR /data
 
 # Install tools necessary used to install samtools and htslib so we can configure fasta files for genomic assembly.
-RUN apt-get update && apt-get install -y \
+RUN apt-get clean && apt-get update && apt-get install -y \
 	build-essential \
 	curl \
 	git \
@@ -27,7 +27,7 @@ RUN curl -L https://github.com/samtools/htslib/releases/download/${htsversion}/h
     curl -L https://github.com/samtools/bcftools/releases/download/${htsversion}/bcftools-${htsversion}.tar.bz2 | tar xj && \
     (cd bcftools-${htsversion} && ./configure --enable-libgsl --enable-perl-filters --with-htslib=system && make install)
 
-# Fetch and index GRCh37 and GRCh38 assemblies. These will augment seqrepo transcript sequences.
+# Fetch and index GRCh37 and GRCh38 assemblies.
 RUN wget -O - https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_genomic.fna.gz | gzip -d | bgzip >  GCF_000001405.25_GRCh37.p13_genomic.fna.gz
 RUN wget -O - https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.fna.gz | gzip -d | bgzip > GCF_000001405.39_GRCh38.p13_genomic.fna.gz
 RUN samtools faidx GCF_000001405.25_GRCh37.p13_genomic.fna.gz
 
@@ -0,0 +1,17 @@
+FROM ubuntu:latest
+RUN apt-get update
+RUN DEBIAN_FRONTEND=noninteractive apt-get upgrade -y
+RUN DEBIAN_FRONTEND=noninteractive apt-get install -y python3 python3-pip python3-psycopg2 postgresql libpq-dev
+WORKDIR /code
+
+# Install Python packages.
+COPY LICENSE README.md pyproject.toml ./
+COPY src/ ./src/
+COPY tests/ ./tests/
+COPY mypy_stubs ./mypy_stubs/
+
+RUN pip install --no-cache-dir --upgrade pip
+RUN pip install --no-cache-dir --upgrade .[dev,server]
+RUN useradd testuser -d /code 
+
+RUN --network=none su testuser -c pytest
@@ -0,0 +1,51 @@
+FROM python:3.9 AS downloader
+
+WORKDIR /data
+
+# Install tools necessary used to install samtools and htslib so we can configure fasta files for genomic assembly.
+RUN apt-get clean && apt-get update && apt-get install -y \
+	build-essential \
+	curl \
+	git \
+	libbz2-dev \
+	libcurl4-openssl-dev \
+	libgsl0-dev \
+	liblzma-dev \
+	libncurses5-dev \
+	libperl-dev \
+	libssl-dev \
+	zlib1g-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install samtools and htslib.
+ARG htsversion=1.19
+RUN curl -L https://github.com/samtools/htslib/releases/download/${htsversion}/htslib-${htsversion}.tar.bz2 | tar xj && \
+    (cd htslib-${htsversion} && ./configure --enable-plugins --with-plugin-path='$(libexecdir)/htslib:/usr/libexec/htslib' && make install) && \
+    ldconfig && \
+    curl -L https://github.com/samtools/samtools/releases/download/${htsversion}/samtools-${htsversion}.tar.bz2 | tar xj && \
+    (cd samtools-${htsversion} && ./configure --with-htslib=system && make install) && \
+    curl -L https://github.com/samtools/bcftools/releases/download/${htsversion}/bcftools-${htsversion}.tar.bz2 | tar xj && \
+    (cd bcftools-${htsversion} && ./configure --enable-libgsl --enable-perl-filters --with-htslib=system && make install)
+
+# Fetch and index GRCh37 and GRCh38 assemblies.
+RUN wget -O - https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_genomic.fna.gz | gzip -d | bgzip >  GCF_000001405.25_GRCh37.p13_genomic.fna.gz
+RUN wget -O - https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.fna.gz | gzip -d | bgzip > GCF_000001405.39_GRCh38.p13_genomic.fna.gz
+RUN samtools faidx GCF_000001405.25_GRCh37.p13_genomic.fna.gz
+RUN samtools faidx GCF_000001405.39_GRCh38.p13_genomic.fna.gz
+
+FROM python:3.9
+COPY --from=downloader /data /data
+
+WORKDIR  /code
+
+# Install the application dependencies.
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+
+# Install the application code.
+COPY src /code/src
+COPY src/mavedb/server_main.py /code/main.py
+
+ENV PYTHONPATH "${PYTHONPATH}:/code/src"
+
+CMD ["arq", "mavedb.worker.WorkerSettings"]
@@ -1,8 +1,8 @@
 # mavedb-api
 
 API for MaveDB. MaveDB is a biological database for Multiplex Assays of Variant Effect (MAVE) datasets.
-The API powers the MaveDB website at [mavedb.org](https://www.mavedb.org) and can also be called separately (see 
-instructions [below](#using-mavedb-api)). 
+The API powers the MaveDB website at [mavedb.org](https://www.mavedb.org) and can also be called separately (see
+instructions [below](#using-mavedb-api)).
 
 
 For more information about MaveDB or to cite MaveDB please refer to the
@@ -44,56 +44,45 @@ The distribution can be uploaded to PyPI using [twine](https://twine.readthedocs
 For use as a server, this distribution includes an optional set of dependencies, which are only invoked if the package
 is installed with `pip install mavedb[server]`.
 
-### Running the API server in Docker on production and test systems
+### Running a local version of the API server
 
 First build the application's Docker image:
 ```
 docker build --tag mavedb-api/mavedb-api .
 ```
 Then start the application and its database:
 ```
-docker-compose -f docker-compose-prod.yml up -d
+docker-compose -f docker-compose-local.yml up -d
 ```
 Omit `-d` (daemon) if you want to run the application in your terminal session, for instance to see startup errors without having
 to inspect the Docker container's log.
 
 To stop the application when it is running as a daemon, run
 ```
-docker-compose -f docker-compose-prod.yml down
+docker-compose -f docker-compose-local.yml down
 ```
 
-`docker-compose-prod.yml` configures two containers: one for the API server and one for the PostgreSQL database. The
-The database stores data in a Docker volume named `mavedb-data`, which will persist after running `docker-compose down`.
+`docker-compose-local.yml` configures four containers: one for the API server, one for the PostgreSQL database, one for the
+worker node and one for the Redis cache which acts as the job queue for the worker node. The worker node stores data in a Docker
+volume named `mavedb-redis` and the database stores data in a Docker volume named `mavedb-data`. Both these volumes will persist
+after running `docker-compose down`.
 
 **Notes**
 1. The `mavedb-api` container requires the following environment variables, which are configured in
-  `docker-compose-prod.yml`:
+  `docker-compose-local.yml`:
 
     - DB_HOST
     - DB_PORT
     - DB_DATABASE_NAME
     - DB_USERNAME
     - DB_PASSWORD
     - NCBI_API_KEY
+    - REDIS_IP
+    - REDIS_PORT
 
     The database username and password should be edited for production deployments. `NCBI_API_KEY` will be removed in
     the future. **TODO** Move these to an .env file.
 
-2. In the procedure given above, we do not push the Docker image to a repository like Docker Hub; we simply build the
-  image on the machine where it will be used. But to deploy the API server on the AWS-hosted test site, first tag the
-  image appropriately and push it to Elastic Container Repository. (These commands require )
-  ```
-  export ECRPASSWORD=$(aws ecr get-login-password --region us-west-2 --profile mavedb-test)
-  echo $ECRPASSWORD | docker login --username AWS --password-stdin {aws_account_id}.dkr.ecr.us-west-2.amazonaws.com
-  docker tag mavedb-api:latest {aws_account_id}.dkr.ecr.us-west-2.amazonaws.com/mavedb-api
-  docker push {aws_account_id}.dkr.ecr.us-west-2.amazonaws.com/mavedb-api
-  ```
-  These commands presuppose that you have the [AWS CLI](https://aws.amazon.com/cli/) installed and have created a named
-  profile, `mavedb-test`, with your AWS credentials.
-
-  With the Docker image pushed to ECR, you can now deploy the application. **TODO** Add instructions if we want to
-  document this.
-
 ### Running the API server in Docker for development
 
 A similar procedure can be followed to run the API server in development mode on your local machine. There are a couple
@@ -134,3 +123,10 @@ Before using either of these methods, configure the environment variables descri
 
 If you use PyCharm, the first method can be used in a Python run configuration, but the second method supports PyCharm's
 FastAPI run configuration.
+
+### Running the API server for production
+
+We maintain deployment configuration options and steps within a [private repository](https://github.com/VariantEffect/mavedb-deployment) used for deploying this source code to
+the production MaveDB environment. The main difference between the production setup and these local setups is that
+the worker and api services are split into distinct environments, allowing them to scale up or down individually
+dependent on need.
@@ -0,0 +1 @@
+This directory contains database migrations run manually for which there is no simple upgrade/downgrade path. They are not runnable as is and will need to be either manually added to an existing alembic migration or transformed into raw SQL statements and executed directly.
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+This directory contains database migrations run manually for which there is no simple upgrade/downgrade path. They are not runnable as is and will need to be either manually added to an existing alembic migration or transformed into raw SQL statements and executed directly.`