Skip to content

Commit fe16267

Browse files
authored
Merge pull request #6 from OpenMined/madhava/panel_bc
Madhava/panel bc
2 parents c1e3df1 + 81b202e commit fe16267

37 files changed

+7229
-501
lines changed

.gitignore

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,9 @@ uv.lock
5959
# Ruff
6060
.ruff_cache/
6161

62-
.claude/*
62+
.claude/*
63+
notebooks/downloads/*
64+
notebooks/work/*
65+
**/*.ipynb_checkpoints
66+
examples/herc2/herc2_*.tsv
67+
examples/herc2/classify_herc2_*.py

docker/build.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ cd "$SCRIPT_DIR/.."
1313
PLATFORMS=${PLATFORMS:-linux/amd64,linux/arm64}
1414
BUILDER_NAME=${BUILDER_NAME:-bioscript-builder}
1515
REMOTE_IMAGE=${REMOTE_IMAGE:-ghcr.io/openmined/bioscript}
16-
OUTPUT_MODE=${OUTPUT_MODE:-push} # push|oci
16+
# By default, produce a multi-arch OCI archive instead of pushing
17+
OUTPUT_MODE=${OUTPUT_MODE:-oci} # oci|push
1718
OUTPUT_DEST="${OUTPUT_DEST:-}"
1819
LOAD_PLATFORM=${LOAD_PLATFORM:-auto} # auto|none|<platform>
1920
VERIFY_MANIFEST=${VERIFY_MANIFEST:-0}
@@ -66,7 +67,7 @@ case "$OUTPUT_MODE" in
6667
BUILD_CMD+=( --output "type=oci,dest=${OUTPUT_DEST}" )
6768
;;
6869
*)
69-
echo "Unsupported OUTPUT_MODE: $OUTPUT_MODE" >&2
70+
echo "Unsupported OUTPUT_MODE: $OUTPUT_MODE (expected 'oci' or 'push')" >&2
7071
exit 1
7172
;;
7273
esac

examples/apol1/apol1_dev.ipynb

Lines changed: 67 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,17 @@
1111
},
1212
{
1313
"cell_type": "code",
14-
"execution_count": null,
14+
"execution_count": 1,
1515
"metadata": {},
1616
"outputs": [],
1717
"source": [
18-
"from bioscript import AlleleCounter\n",
1918
"from bioscript.classifier import DiploidResult, GenotypeClassifier, GenotypeEnum\n",
2019
"from bioscript.types import Alleles, VariantCall"
2120
]
2221
},
2322
{
2423
"cell_type": "code",
25-
"execution_count": null,
24+
"execution_count": 2,
2625
"metadata": {},
2726
"outputs": [],
2827
"source": [
@@ -36,7 +35,7 @@
3635
},
3736
{
3837
"cell_type": "code",
39-
"execution_count": null,
38+
"execution_count": 3,
4039
"metadata": {},
4140
"outputs": [],
4241
"source": [
@@ -50,31 +49,24 @@
5049
},
5150
{
5251
"cell_type": "code",
53-
"execution_count": null,
52+
"execution_count": 4,
5453
"metadata": {},
5554
"outputs": [],
5655
"source": [
5756
"class APOL1Classifier(GenotypeClassifier):\n",
5857
" def classify(self, matches) -> DiploidResult:\n",
59-
" g2_counter = AlleleCounter(rs71785313)\n",
60-
" g1_site1_counter = AlleleCounter(rs73885319)\n",
61-
" g1_site2_counter = AlleleCounter(rs60910145)\n",
58+
" \n",
59+
" g2_match = matches.get(rs71785313)\n",
60+
" site1_match = matches.get(rs73885319)\n",
61+
" site2_match = matches.get(rs60910145)\n",
6262
"\n",
63-
" g2_result = g2_counter.count(matches)\n",
64-
" site1_result = g1_site1_counter.count(matches)\n",
65-
" site2_result = g1_site2_counter.count(matches)\n",
66-
"\n",
67-
" has_data = (\n",
68-
" g2_result.genotype is not None\n",
69-
" or site1_result.genotype is not None\n",
70-
" or site2_result.genotype is not None\n",
71-
" )\n",
63+
" has_data = any(match is not None for match in (g2_match, site1_match, site2_match))\n",
7264
" if not has_data:\n",
7365
" return DiploidResult(MISSING, MISSING)\n",
7466
"\n",
75-
" d_count = g2_result.alt_count\n",
76-
" site1_variants = site1_result.alt_count\n",
77-
" site2_variants = site2_result.alt_count\n",
67+
" d_count = g2_match.alt_count if g2_match else 0\n",
68+
" site1_variants = site1_match.alt_count if site1_match else 0\n",
69+
" site2_variants = site2_match.alt_count if site2_match else 0\n",
7870
"\n",
7971
" has_g1 = site1_variants > 0 and site2_variants > 0\n",
8072
" g1_total = site1_variants + site2_variants if has_g1 else 0\n",
@@ -97,13 +89,13 @@
9789
},
9890
{
9991
"cell_type": "code",
100-
"execution_count": null,
92+
"execution_count": 5,
10193
"metadata": {},
10294
"outputs": [],
10395
"source": [
10496
"__bioscript__ = {\n",
10597
" \"variant_calls\": [rs73885319, rs60910145, rs71785313],\n",
106-
" \"classifier\": APOL1Classifier(),\n",
98+
" \"classifier\": APOL1Classifier,\n",
10799
" \"name\": \"APOL1\",\n",
108100
"}"
109101
]
@@ -119,7 +111,7 @@
119111
},
120112
{
121113
"cell_type": "code",
122-
"execution_count": null,
114+
"execution_count": 6,
123115
"metadata": {},
124116
"outputs": [],
125117
"source": [
@@ -138,7 +130,7 @@
138130
},
139131
{
140132
"cell_type": "code",
141-
"execution_count": null,
133+
"execution_count": 7,
142134
"metadata": {},
143135
"outputs": [],
144136
"source": [
@@ -152,7 +144,7 @@
152144
},
153145
{
154146
"cell_type": "code",
155-
"execution_count": null,
147+
"execution_count": 8,
156148
"metadata": {},
157149
"outputs": [],
158150
"source": [
@@ -175,53 +167,75 @@
175167
},
176168
{
177169
"cell_type": "code",
178-
"execution_count": null,
170+
"execution_count": 9,
179171
"metadata": {},
180-
"outputs": [],
172+
"outputs": [
173+
{
174+
"name": "stdout",
175+
"output_type": "stream",
176+
"text": [
177+
"✓ All tests passed!\n"
178+
]
179+
}
180+
],
181181
"source": [
182182
"# Run tests\n",
183183
"test_g0_homozygous()\n",
184184
"test_g1_homozygous()\n",
185185
"print(\"✓ All tests passed!\")"
186186
]
187187
},
188-
{
189-
"cell_type": "markdown",
190-
"metadata": {},
191-
"source": [
192-
"## Export to Python Module\n",
193-
"\n",
194-
"Export this notebook to a Python file:\n",
195-
"\n",
196-
"```bash\n",
197-
"bioscript export apol1_dev.ipynb -o classify_apol1_exported.py\n",
198-
"```\n",
199-
"\n",
200-
"Or in Python:\n",
201-
"\n",
202-
"```python\n",
203-
"from bioscript import export_from_notebook\n",
204-
"export_from_notebook(\"apol1_dev.ipynb\", \"classify_apol1_exported.py\")\n",
205-
"```"
206-
]
207-
},
208188
{
209189
"cell_type": "code",
210-
"execution_count": null,
190+
"execution_count": 10,
211191
"metadata": {},
212-
"outputs": [],
192+
"outputs": [
193+
{
194+
"data": {
195+
"text/plain": [
196+
"PosixPath('classify_apol1.py')"
197+
]
198+
},
199+
"execution_count": 10,
200+
"metadata": {},
201+
"output_type": "execute_result"
202+
}
203+
],
213204
"source": [
214205
"from bioscript import export_from_notebook\n",
215-
"export_from_notebook(\"apol1_dev.ipynb\", \"classify_apol1_exported.py\")"
206+
"export_from_notebook(\"apol1_dev.ipynb\", \"classify_apol1.py\")"
216207
]
217208
},
218209
{
219210
"cell_type": "code",
220-
"execution_count": null,
211+
"execution_count": 11,
221212
"metadata": {},
222-
"outputs": [],
213+
"outputs": [
214+
{
215+
"name": "stdout",
216+
"output_type": "stream",
217+
"text": [
218+
"\n",
219+
"============================================================\n",
220+
"Testing: classify_apol1.py\n",
221+
"============================================================\n",
222+
"Running tests with pytest: classify_apol1.py\n",
223+
"\u001b[1m============================= test session starts ==============================\u001b[0m\n",
224+
"platform darwin -- Python 3.12.7, pytest-8.4.2, pluggy-1.6.0 -- /Users/madhavajay/dev/bioscript/workspace1/.venv/bin/python3\n",
225+
"cachedir: .pytest_cache\n",
226+
"rootdir: /Users/madhavajay/dev/bioscript/workspace1/examples/apol1\n",
227+
"plugins: anyio-4.11.0\n",
228+
"collected 2 items \u001b[0m\n",
229+
"\n",
230+
"classify_apol1.py::test_g0_homozygous \u001b[32mPASSED\u001b[0m\u001b[32m [ 50%]\u001b[0m\n",
231+
"classify_apol1.py::test_g1_homozygous \u001b[32mPASSED\u001b[0m\u001b[32m [100%]\u001b[0m\n",
232+
"\n",
233+
"\u001b[32m============================== \u001b[32m\u001b[1m2 passed\u001b[0m\u001b[32m in 0.00s\u001b[0m\u001b[32m ===============================\u001b[0m\n"
234+
]
235+
}
236+
],
223237
"source": [
224-
"!bioscript test classify_apol1_exported.py"
238+
"!bioscript test classify_apol1.py"
225239
]
226240
},
227241
{

0 commit comments

Comments
 (0)