|
27 | 27 | "import pandas as pd\n", |
28 | 28 | "from bioscript import optional_int, optional_str, write_tsv\n", |
29 | 29 | "from bioscript.classifier import GenotypeClassifier\n", |
30 | | - "from bioscript.types import VariantCall" |
| 30 | + "from bioscript.types import VariantCall\n", |
| 31 | + "from bioscript import assets_dir" |
| 32 | + ] |
| 33 | + }, |
| 34 | + { |
| 35 | + "cell_type": "code", |
| 36 | + "execution_count": null, |
| 37 | + "metadata": {}, |
| 38 | + "outputs": [], |
| 39 | + "source": [ |
| 40 | + "ASSETS_DIR = assets_dir()\n", |
| 41 | + "RESULT_HEADERS = [\n", |
| 42 | + " \"participant_id\",\n", |
| 43 | + " \"filename\",\n", |
| 44 | + " \"gene\",\n", |
| 45 | + " \"rsid\",\n", |
| 46 | + " \"chromosome\",\n", |
| 47 | + " \"position\",\n", |
| 48 | + " \"genotype\",\n", |
| 49 | + " \"ref\",\n", |
| 50 | + " \"alt\",\n", |
| 51 | + " \"variant_type\",\n", |
| 52 | + " \"match_type\",\n", |
| 53 | + "]" |
31 | 54 | ] |
32 | 55 | }, |
33 | 56 | { |
|
61 | 84 | "source": [ |
62 | 85 | "def get_vcs() -> list[VariantCall]:\n", |
63 | 86 | " \"\"\"Load BRCA1 and BRCA2 variant calls from ClinVar TSV files.\"\"\"\n", |
64 | | - " dfs = [pd.read_csv(f, sep=\"\\t\") for f in [\"brca1_clinvar.tsv\", \"brca2_clinvar.tsv\"]]\n", |
| 87 | + " data_files = [ASSETS_DIR / name for name in [\"brca1_clinvar.tsv\", \"brca2_clinvar.tsv\"]]\n", |
| 88 | + " dfs = [pd.read_csv(f, sep=\"\\t\") for f in data_files]\n", |
65 | 89 | " df = pd.concat(dfs, ignore_index=True)\n", |
66 | 90 | " print(f\"Loaded {len(df)} variants from BRCA1 and BRCA2\")\n", |
67 | 91 | " return generate_variant_calls(df)" |
|
88 | 112 | " write_tsv(f\"{self.output_basename}_ref.tsv\", ref_rows)\n", |
89 | 113 | " write_tsv(f\"{self.output_basename}_no.tsv\", no_rows)\n", |
90 | 114 | "\n", |
91 | | - " write_tsv(f\"{self.output_basename}.tsv\", var_rows)\n", |
| 115 | + " write_tsv(f\"{self.output_basename}.tsv\", var_rows, headers=RESULT_HEADERS)\n", |
92 | 116 | " \n", |
93 | 117 | " # Return variant rows for testing\n", |
94 | 118 | " return var_rows" |
|
363 | 387 | "pipeline\n" |
364 | 388 | ] |
365 | 389 | }, |
366 | | - { |
367 | | - "cell_type": "code", |
368 | | - "execution_count": null, |
369 | | - "metadata": {}, |
370 | | - "outputs": [], |
371 | | - "source": [] |
372 | | - }, |
373 | | - { |
374 | | - "cell_type": "code", |
375 | | - "execution_count": null, |
376 | | - "metadata": {}, |
377 | | - "outputs": [], |
378 | | - "source": [] |
379 | | - }, |
380 | 390 | { |
381 | 391 | "cell_type": "code", |
382 | 392 | "execution_count": null, |
|
0 commit comments