|
11 | 11 | }, |
12 | 12 | { |
13 | 13 | "cell_type": "code", |
14 | | - "execution_count": null, |
| 14 | + "execution_count": 1, |
15 | 15 | "metadata": {}, |
16 | 16 | "outputs": [], |
17 | 17 | "source": [ |
18 | | - "from bioscript import AlleleCounter\n", |
19 | 18 | "from bioscript.classifier import DiploidResult, GenotypeClassifier, GenotypeEnum\n", |
20 | 19 | "from bioscript.types import Alleles, VariantCall" |
21 | 20 | ] |
22 | 21 | }, |
23 | 22 | { |
24 | 23 | "cell_type": "code", |
25 | | - "execution_count": null, |
| 24 | + "execution_count": 2, |
26 | 25 | "metadata": {}, |
27 | 26 | "outputs": [], |
28 | 27 | "source": [ |
|
36 | 35 | }, |
37 | 36 | { |
38 | 37 | "cell_type": "code", |
39 | | - "execution_count": null, |
| 38 | + "execution_count": 3, |
40 | 39 | "metadata": {}, |
41 | 40 | "outputs": [], |
42 | 41 | "source": [ |
|
50 | 49 | }, |
51 | 50 | { |
52 | 51 | "cell_type": "code", |
53 | | - "execution_count": null, |
| 52 | + "execution_count": 4, |
54 | 53 | "metadata": {}, |
55 | 54 | "outputs": [], |
56 | 55 | "source": [ |
57 | 56 | "class APOL1Classifier(GenotypeClassifier):\n", |
58 | 57 | " def classify(self, matches) -> DiploidResult:\n", |
59 | | - " g2_counter = AlleleCounter(rs71785313)\n", |
60 | | - " g1_site1_counter = AlleleCounter(rs73885319)\n", |
61 | | - " g1_site2_counter = AlleleCounter(rs60910145)\n", |
| 58 | + " \n", |
| 59 | + " g2_match = matches.get(rs71785313)\n", |
| 60 | + " site1_match = matches.get(rs73885319)\n", |
| 61 | + " site2_match = matches.get(rs60910145)\n", |
62 | 62 | "\n", |
63 | | - " g2_result = g2_counter.count(matches)\n", |
64 | | - " site1_result = g1_site1_counter.count(matches)\n", |
65 | | - " site2_result = g1_site2_counter.count(matches)\n", |
66 | | - "\n", |
67 | | - " has_data = (\n", |
68 | | - " g2_result.genotype is not None\n", |
69 | | - " or site1_result.genotype is not None\n", |
70 | | - " or site2_result.genotype is not None\n", |
71 | | - " )\n", |
| 63 | + " has_data = any(match is not None for match in (g2_match, site1_match, site2_match))\n", |
72 | 64 | " if not has_data:\n", |
73 | 65 | " return DiploidResult(MISSING, MISSING)\n", |
74 | 66 | "\n", |
75 | | - " d_count = g2_result.alt_count\n", |
76 | | - " site1_variants = site1_result.alt_count\n", |
77 | | - " site2_variants = site2_result.alt_count\n", |
| 67 | + " d_count = g2_match.alt_count if g2_match else 0\n", |
| 68 | + " site1_variants = site1_match.alt_count if site1_match else 0\n", |
| 69 | + " site2_variants = site2_match.alt_count if site2_match else 0\n", |
78 | 70 | "\n", |
79 | 71 | " has_g1 = site1_variants > 0 and site2_variants > 0\n", |
80 | 72 | " g1_total = site1_variants + site2_variants if has_g1 else 0\n", |
|
97 | 89 | }, |
98 | 90 | { |
99 | 91 | "cell_type": "code", |
100 | | - "execution_count": null, |
| 92 | + "execution_count": 5, |
101 | 93 | "metadata": {}, |
102 | 94 | "outputs": [], |
103 | 95 | "source": [ |
104 | 96 | "__bioscript__ = {\n", |
105 | 97 | " \"variant_calls\": [rs73885319, rs60910145, rs71785313],\n", |
106 | | - " \"classifier\": APOL1Classifier(),\n", |
| 98 | + " \"classifier\": APOL1Classifier,\n", |
107 | 99 | " \"name\": \"APOL1\",\n", |
108 | 100 | "}" |
109 | 101 | ] |
|
119 | 111 | }, |
120 | 112 | { |
121 | 113 | "cell_type": "code", |
122 | | - "execution_count": null, |
| 114 | + "execution_count": 6, |
123 | 115 | "metadata": {}, |
124 | 116 | "outputs": [], |
125 | 117 | "source": [ |
|
138 | 130 | }, |
139 | 131 | { |
140 | 132 | "cell_type": "code", |
141 | | - "execution_count": null, |
| 133 | + "execution_count": 7, |
142 | 134 | "metadata": {}, |
143 | 135 | "outputs": [], |
144 | 136 | "source": [ |
|
152 | 144 | }, |
153 | 145 | { |
154 | 146 | "cell_type": "code", |
155 | | - "execution_count": null, |
| 147 | + "execution_count": 8, |
156 | 148 | "metadata": {}, |
157 | 149 | "outputs": [], |
158 | 150 | "source": [ |
|
175 | 167 | }, |
176 | 168 | { |
177 | 169 | "cell_type": "code", |
178 | | - "execution_count": null, |
| 170 | + "execution_count": 9, |
179 | 171 | "metadata": {}, |
180 | | - "outputs": [], |
| 172 | + "outputs": [ |
| 173 | + { |
| 174 | + "name": "stdout", |
| 175 | + "output_type": "stream", |
| 176 | + "text": [ |
| 177 | + "✓ All tests passed!\n" |
| 178 | + ] |
| 179 | + } |
| 180 | + ], |
181 | 181 | "source": [ |
182 | 182 | "# Run tests\n", |
183 | 183 | "test_g0_homozygous()\n", |
184 | 184 | "test_g1_homozygous()\n", |
185 | 185 | "print(\"✓ All tests passed!\")" |
186 | 186 | ] |
187 | 187 | }, |
188 | | - { |
189 | | - "cell_type": "markdown", |
190 | | - "metadata": {}, |
191 | | - "source": [ |
192 | | - "## Export to Python Module\n", |
193 | | - "\n", |
194 | | - "Export this notebook to a Python file:\n", |
195 | | - "\n", |
196 | | - "```bash\n", |
197 | | - "bioscript export apol1_dev.ipynb -o classify_apol1_exported.py\n", |
198 | | - "```\n", |
199 | | - "\n", |
200 | | - "Or in Python:\n", |
201 | | - "\n", |
202 | | - "```python\n", |
203 | | - "from bioscript import export_from_notebook\n", |
204 | | - "export_from_notebook(\"apol1_dev.ipynb\", \"classify_apol1_exported.py\")\n", |
205 | | - "```" |
206 | | - ] |
207 | | - }, |
208 | 188 | { |
209 | 189 | "cell_type": "code", |
210 | | - "execution_count": null, |
| 190 | + "execution_count": 10, |
211 | 191 | "metadata": {}, |
212 | | - "outputs": [], |
| 192 | + "outputs": [ |
| 193 | + { |
| 194 | + "data": { |
| 195 | + "text/plain": [ |
| 196 | + "PosixPath('classify_apol1.py')" |
| 197 | + ] |
| 198 | + }, |
| 199 | + "execution_count": 10, |
| 200 | + "metadata": {}, |
| 201 | + "output_type": "execute_result" |
| 202 | + } |
| 203 | + ], |
213 | 204 | "source": [ |
214 | 205 | "from bioscript import export_from_notebook\n", |
215 | | - "export_from_notebook(\"apol1_dev.ipynb\", \"classify_apol1_exported.py\")" |
| 206 | + "export_from_notebook(\"apol1_dev.ipynb\", \"classify_apol1.py\")" |
216 | 207 | ] |
217 | 208 | }, |
218 | 209 | { |
219 | 210 | "cell_type": "code", |
220 | | - "execution_count": null, |
| 211 | + "execution_count": 11, |
221 | 212 | "metadata": {}, |
222 | | - "outputs": [], |
| 213 | + "outputs": [ |
| 214 | + { |
| 215 | + "name": "stdout", |
| 216 | + "output_type": "stream", |
| 217 | + "text": [ |
| 218 | + "\n", |
| 219 | + "============================================================\n", |
| 220 | + "Testing: classify_apol1.py\n", |
| 221 | + "============================================================\n", |
| 222 | + "Running tests with pytest: classify_apol1.py\n", |
| 223 | + "\u001b[1m============================= test session starts ==============================\u001b[0m\n", |
| 224 | + "platform darwin -- Python 3.12.7, pytest-8.4.2, pluggy-1.6.0 -- /Users/madhavajay/dev/bioscript/workspace1/.venv/bin/python3\n", |
| 225 | + "cachedir: .pytest_cache\n", |
| 226 | + "rootdir: /Users/madhavajay/dev/bioscript/workspace1/examples/apol1\n", |
| 227 | + "plugins: anyio-4.11.0\n", |
| 228 | + "collected 2 items \u001b[0m\n", |
| 229 | + "\n", |
| 230 | + "classify_apol1.py::test_g0_homozygous \u001b[32mPASSED\u001b[0m\u001b[32m [ 50%]\u001b[0m\n", |
| 231 | + "classify_apol1.py::test_g1_homozygous \u001b[32mPASSED\u001b[0m\u001b[32m [100%]\u001b[0m\n", |
| 232 | + "\n", |
| 233 | + "\u001b[32m============================== \u001b[32m\u001b[1m2 passed\u001b[0m\u001b[32m in 0.00s\u001b[0m\u001b[32m ===============================\u001b[0m\n" |
| 234 | + ] |
| 235 | + } |
| 236 | + ], |
223 | 237 | "source": [ |
224 | | - "!bioscript test classify_apol1_exported.py" |
| 238 | + "!bioscript test classify_apol1.py" |
225 | 239 | ] |
226 | 240 | }, |
227 | 241 | { |
|
0 commit comments