Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
ffb70ac
wip: pcgv2 l2cache ingest
akhileshh Sep 20, 2021
4633d7a
wip: pcgv2 ingest
akhileshh Sep 20, 2021
9d7910a
wip: pcgv2 l2cache ingest
akhileshh Sep 20, 2021
6d62080
wip: pcgv2 ingest
akhileshh Sep 20, 2021
3e2fc79
fix: filenames
akhileshh Feb 13, 2022
221c863
fix: remove cg dependency from core
akhileshh Feb 14, 2022
cbd483a
wip: pcgv2 ingest
akhileshh Sep 20, 2021
a9c4e8f
fix: remove cg dependency from core
akhileshh Feb 14, 2022
f651980
fix: use cv where possible
akhileshh Feb 26, 2022
062745e
fix: image and default client init
akhileshh Apr 4, 2022
79958d8
fix: client config
akhileshh Apr 4, 2022
041a445
fix: convert to utc
akhileshh Apr 6, 2022
e438fc8
fix: ignore missing config
akhileshh Apr 6, 2022
8cb0d1c
wip: pcgv2 ingest
akhileshh Sep 20, 2021
dd7fef0
fix: remove cg dependency from core
akhileshh Feb 14, 2022
0bddd7b
wip: pcgv2 ingest
akhileshh Sep 20, 2021
2294473
wip: pcgv2 ingest
akhileshh Sep 20, 2021
7c44e1e
fix: remove cg dependency from core
akhileshh Feb 14, 2022
2d0e95d
fix: use cv where possible
akhileshh Feb 26, 2022
e150316
fix: image and default client init
akhileshh Apr 4, 2022
ea369b6
fix: client config
akhileshh Apr 4, 2022
c849efe
fix: convert to utc
akhileshh Apr 6, 2022
ebf97ad
fix: update yamls, default q name
akhileshh Sep 15, 2022
7ad8e23
fix: improve cli
akhileshh Sep 15, 2022
04f08c1
fix: reduce queuing memory usage
akhileshh Sep 16, 2022
f513c65
fix: incorrect properties
akhileshh Sep 17, 2022
a6ebe50
fix: use fail_to_zero=True, proper job count
akhileshh Sep 17, 2022
a71b08d
update default queue length
akhileshh Feb 2, 2023
378f43b
update docker files, remove deprecated np.int
akhileshh Apr 21, 2024
e83cbf7
rebase; rename queue vars
akhileshh Aug 23, 2024
4138269
downgrade numpy
akhileshh Aug 23, 2024
0703be7
fix: pythonpath in docker; ignore missing supervoxels
akhileshh Aug 31, 2024
dda5398
migrate to the new kvdbclient
akhileshh Feb 28, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
.idea/*

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

# Visual Code
.vscode/

# terraform
.terraform/
*.lock.hcl
*.tfstate
*.tfstate.*


# local dev stuff
.devcontainer/
*.ipynb
*.rdb
/protobuf*

# Git
.git/
7 changes: 3 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
FROM tiangolo/uwsgi-nginx-flask:python3.7
ARG PYTHON_VERSION=3.11
FROM tiangolo/uwsgi-nginx-flask:python${PYTHON_VERSION}

ENV GIT_SSL_NO_VERIFY=1
RUN mkdir -p /home/nginx/.cloudvolume/secrets && chown -R nginx /home/nginx && usermod -d /home/nginx -s /bin/bash nginx

COPY . /app
RUN pip install pip==20.0.1 \
&& pip install --no-cache-dir --upgrade -r requirements.txt \
&& pip install --upgrade git+https://github.com/seung-lab/KVDbClient.git@main
RUN pip install --no-cache-dir --upgrade -r requirements.txt
10 changes: 5 additions & 5 deletions cloudbuild.v1.ingest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,23 @@ steps:
timeout: 600s
args:
- build
- "--tag=gcr.io/neuromancer-seung-import/pcgl2cache:ingest_v1_$TAG_NAME"
- "--file=./ingest.Dockerfile"
- "--tag=gcr.io/$PROJECT_ID/pcgl2cache:ingest_v1"
- "--file=./ingest.v1.Dockerfile"
- .
- name: "gcr.io/cloud-builders/docker"
entrypoint: "bash"
args:
[
"-c",
"docker tag gcr.io/$PROJECT_ID/pcgl2cache:ingest_v1_$TAG_NAME $$USERNAME/pcgl2cache:ingest_v1_$TAG_NAME",
"docker tag gcr.io/$PROJECT_ID/pcgl2cache:ingest_v1 $$USERNAME/pcgl2cache:ingest_v1",
]
secretEnv: ["USERNAME"]
- name: "gcr.io/cloud-builders/docker"
entrypoint: "bash"
args: ["-c", "docker push $$USERNAME/pcgl2cache:ingest_v1_$TAG_NAME"]
args: ["-c", "docker push $$USERNAME/pcgl2cache:ingest_v1"]
secretEnv: ["USERNAME"]
images:
- "gcr.io/neuromancer-seung-import/pcgl2cache:ingest_v1_$TAG_NAME"
- "gcr.io/$PROJECT_ID/pcgl2cache:ingest_v1"
availableSecrets:
secretManager:
- versionName: projects/$PROJECT_ID/secrets/docker-password/versions/1
Expand Down
6 changes: 3 additions & 3 deletions cloudbuild.v1.worker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ steps:
timeout: 600s
args:
- build
- "--tag=gcr.io/neuromancer-seung-import/pcgl2cache:worker_pcgv1_$TAG_NAME"
- "--file=./ingest.Dockerfile"
- "--tag=gcr.io/$PROJECT_ID/pcgl2cache:worker_pcgv1_$TAG_NAME"
- "--file=./ingest.v1.Dockerfile"
- .
- name: "gcr.io/cloud-builders/docker"
entrypoint: "bash"
Expand All @@ -23,7 +23,7 @@ steps:
args: ["-c", "docker push $$USERNAME/pcgl2cache:worker_pcgv1_$TAG_NAME"]
secretEnv: ["USERNAME"]
images:
- "gcr.io/neuromancer-seung-import/pcgl2cache:worker_pcgv1_$TAG_NAME"
- "gcr.io/$PROJECT_ID/pcgl2cache:worker_pcgv1_$TAG_NAME"
availableSecrets:
secretManager:
- versionName: projects/$PROJECT_ID/secrets/docker-password/versions/1
Expand Down
32 changes: 32 additions & 0 deletions cloudbuild.v2.ingest.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
steps:
- name: "gcr.io/cloud-builders/docker"
entrypoint: "bash"
args: ["-c", "docker login --username=$$USERNAME --password=$$PASSWORD"]
secretEnv: ["USERNAME", "PASSWORD"]
- name: "gcr.io/cloud-builders/docker"
timeout: 600s
args:
- build
- "--tag=gcr.io/$PROJECT_ID/pcgl2cache:ingest_v2"
- "--file=./ingest.v2.Dockerfile"
- .
- name: "gcr.io/cloud-builders/docker"
entrypoint: "bash"
args:
[
"-c",
"docker tag gcr.io/$PROJECT_ID/pcgl2cache:ingest_v2 $$USERNAME/pcgl2cache:ingest_v2",
]
secretEnv: ["USERNAME"]
- name: "gcr.io/cloud-builders/docker"
entrypoint: "bash"
args: ["-c", "docker push $$USERNAME/pcgl2cache:ingest_v2"]
secretEnv: ["USERNAME"]
images:
- "gcr.io/$PROJECT_ID/pcgl2cache:ingest_v2"
availableSecrets:
secretManager:
- versionName: projects/$PROJECT_ID/secrets/docker-password/versions/1
env: "PASSWORD"
- versionName: projects/$PROJECT_ID/secrets/docker-username/versions/1
env: "USERNAME"
32 changes: 32 additions & 0 deletions cloudbuild.v2.worker.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
steps:
- name: "gcr.io/cloud-builders/docker"
entrypoint: "bash"
args: ["-c", "docker login --username=$$USERNAME --password=$$PASSWORD"]
secretEnv: ["USERNAME", "PASSWORD"]
- name: "gcr.io/cloud-builders/docker"
timeout: 600s
args:
- build
- "--tag=gcr.io/$PROJECT_ID/pcgl2cache:worker_pcgv2_$TAG_NAME"
- "--file=./ingest.v2.Dockerfile"
- .
- name: "gcr.io/cloud-builders/docker"
entrypoint: "bash"
args:
[
"-c",
"docker tag gcr.io/$PROJECT_ID/pcgl2cache:worker_pcgv2_$TAG_NAME $$USERNAME/pcgl2cache:worker_pcgv2_$TAG_NAME",
]
secretEnv: ["USERNAME"]
- name: "gcr.io/cloud-builders/docker"
entrypoint: "bash"
args: ["-c", "docker push $$USERNAME/pcgl2cache:worker_pcgv2_$TAG_NAME"]
secretEnv: ["USERNAME"]
images:
- "gcr.io/$PROJECT_ID/pcgl2cache:worker_pcgv2_$TAG_NAME"
availableSecrets:
secretManager:
- versionName: projects/$PROJECT_ID/secrets/docker-password/versions/1
env: "PASSWORD"
- versionName: projects/$PROJECT_ID/secrets/docker-username/versions/1
env: "USERNAME"
10 changes: 0 additions & 10 deletions ingest.Dockerfile

This file was deleted.

8 changes: 8 additions & 0 deletions ingest.v1.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM caveconnectome/pychunkedgraph:base_042124

ENV GIT_SSL_NO_VERIFY=1
RUN mkdir -p /home/nginx/.cloudvolume/secrets && chown -R nginx /home/nginx && usermod -d /home/nginx -s /bin/bash nginx

COPY . /app
RUN pip install --no-cache-dir --upgrade -r requirements.txt \
&& pip install --upgrade git+https://github.com/CAVEconnectome/PyChunkedGraph.git@pcgv1
13 changes: 13 additions & 0 deletions ingest.v2.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM caveconnectome/pychunkedgraph:base_042124
ENV VIRTUAL_ENV=/app/venv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
ENV GIT_SSL_NO_VERIFY=1
ENV CHUNKEDGRAPH_VERSION=2

RUN mkdir -p /home/nginx/.cloudvolume/secrets && chown -R nginx /home/nginx && usermod -d /home/nginx -s /bin/bash nginx

COPY requirements.txt .
RUN pip install --upgrade --no-cache-dir -r requirements.txt \
&& pip install --upgrade git+https://github.com/CAVEconnectome/PyChunkedGraph.git@main

COPY . /app
2 changes: 1 addition & 1 deletion pcgl2cache/core/attributes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
from kvdbclient.serializers import NumPyArray
from kvdbclient.serializers import NumPyValue
from kvdbclient.bigtable.attributes import Attribute
from kvdbclient.attributes import _Attribute as Attribute

UINT64 = np.dtype("uint64").newbyteorder("L")
UINT32 = np.dtype("uint32").newbyteorder("L")
Expand Down
14 changes: 7 additions & 7 deletions pcgl2cache/core/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from edt import edt
from sklearn import decomposition
from kvdbclient import BigTableClient
from kvdbclient.base import serialize_uint64
from cloudvolume import CloudVolume

from . import attributes
Expand Down Expand Up @@ -103,7 +104,9 @@ def get_remapped_segmentation(self, l2id=None):

def _get_l2_ids(l2vol: L2ChunkVolume, svids: np.array) -> np.array:
if l2vol.cg:
l2ids = l2vol.cg.get_roots(svids, stop_layer=2, time_stamp=l2vol.timestamp)
l2ids = l2vol.cg.get_roots(
svids, stop_layer=2, fail_to_zero=True, time_stamp=l2vol.timestamp
)
layers = l2vol.cg.get_chunk_layers(l2ids)
sv_mask = layers == 1
l2ids[sv_mask] = 0
Expand Down Expand Up @@ -312,10 +315,7 @@ def run_l2cache(


def write_to_db(client: BigTableClient, result_d: dict) -> None:
from kvdbclient.base import Entry
from kvdbclient.base import EntryKey

entries = []
rows = []
for tup in zip(*result_d.values()):
(
l2id,
Expand All @@ -338,5 +338,5 @@ def write_to_db(client: BigTableClient, result_d: dict) -> None:
attributes.PCA: pca_comp,
attributes.PCA_VAL: pca_vals,
}
entries.append(Entry(EntryKey(l2id), val_d))
client.write_entries(entries)
rows.append(client.mutate_row(serialize_uint64(l2id), val_d))
client.write(rows)
13 changes: 6 additions & 7 deletions pcgl2cache/ingest/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from os import environ
from collections import namedtuple
from os import environ

Expand All @@ -6,16 +7,14 @@

_cluster_ingest_config_fields = (
"REDIS_URL",
"BATCH_SIZE",
"L2CACHE_Q_NAME",
"L2CACHE_Q_LIMIT", # these limits ensure the queue won't use too much memory
"L2CACHE_Q_INTERVAL", # sleep interval before queuing the next job when limit is reached
"QUEUE_NAME",
"QUEUE_SIZE", # these limits ensure the queue won't use too much memory
"QUEUE_INTERVAL", # sleep interval before queuing the next job when limit is reached
)
_cluster_ingest_defaults = (
REDIS_URL,
10,
"atomic",
int(environ.get("L2CACHE_Q_LIMIT", 500000)),
"l2",
int(environ.get("QUEUE_SIZE", 1000000)),
60,
)
ClusterIngestConfig = namedtuple(
Expand Down
Loading