Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions data/catalogs/STRchive-disease-loci.T2T-chm13.atarva.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#chrom start stop motif motif_len id
chr1 870158 870178 GGCGCGGAGC 10 HMNR7_VWA1
chr1 57245935 57245973 GAAAT 5 SCA37_DAB1
chr1 94266544 94266567 GCC 3 OPDM_ABCD3
chr1 148519695 148519738 GGC 3 NIID_NOTCH2NLC
chr1 154328121 154330802 GGCTNNGGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG 61 ADTKD_MUC1
chr1 155728131 155728159 GGGCC 5 NME_NAXE
chr2 96703674 96703732 AAATG 5 FAME2_STARD7
chr2 100563685 100563738 GCC 3 FRA2A_AFF3
chr2 176581179 176581224 GCN 3 SD5_HOXD13
chr2 191369982 191370024 GCA 3 GDPAG_GLS
chr3 63956302 63956333 CAG 3 SCA7_ATXN7
chr3 63956333 63956345 CCG 3 SCA7_ATXN7_flank
chr3 131917482 131917557 CAGG 4 DM2_CNBP
chr3 131917557 131917597 CAGA 4 DM2_CNBP_flank
chr3 131917597 131917635 CA 2 DM2_CNBP_flank
chr3 141687011 141687054 NGC 3 BPES_FOXL2
chr3 186521667 186521706 TTTCA 5 FAME4_YEATS2
chr4 3073603 3073687 CAG 3 HD_HTT
chr4 3073687 3073693 CAACAG 6 HD_HTT_flank
chr4 3073693 3073729 CCG 3 HD_HTT_flank
chr4 39318077 39318136 AAGGG 5 CANVAS_RFC1
chr4 41719745 41719805 GCN 3 CCHS_PHOX2B
chr4 162693303 162693405 TTTCA 5 FAME7_RAPGEF2
chr5 10295525 10295593 TTTCA 5 FAME3_MARCHF6
chr5 147414733 147414780 GCT 3 SCA12_PPP2R2B
chr6 16200188 16200282 CTG 3 SCA1_ATXN1
chr6 45257567 45257618 GCN 3 CCD_RUNX2
chr6 171935458 171935569 GCA 3 SCA17_TBP
chr7 27335684 27335720 NGC 3 HFG_HOXA13-III
chr7 27335813 27335849 NGC 3 HFG_HOXA13-II
chr7 27335912 27335954 NGC 3 HFG_HOXA13-I
chr7 56047900 56047939 GCG 3 FRA7A_ZNF713
chr8 105716409 105716441 CGC 3 OPDM1_LRP12
chr8 119495247 119495353 TGAAA 5 FAME1_SAMD12
chr9 27584063 27584155 GGCCCC 6 FTDALS1_C9orf72
chr9 81210834 81210861 GAA 3 FRDA_FXN
chr9 81210861 81210877 A 1 FRDA_FXN_flank
chr9 142886568 142886595 GCC 3 HSAN-VIII_PRDM12
chr10 80695718 80695748 GGC 3 OPML1_NUTM2B-AS1
chr11 119226662 119226696 CGG 3 JBS_CBL
chr12 6947903 6947941 CAG 3 DRPLA_ATN1
chr12 50468095 50468118 GGC 3 FRA12A_DIP2B
chr12 111575873 111575940 CTG 3 SCA2_ATXN2
chr12 123532573 123532603 GGC 3 OPDM4_RILPL1
chr13 69361243 69361270 CTG 3 SCA8_ATXN8OS
chr13 69361270 69361300 CTA 3 SCA8_ATXN8OS_flank
chr13 99196358 99196404 GCN 3 HPE5_ZIC2
chr13 101377549 101377792 GAA 3 SCA27B_FGF14
chr14 17522488 17522519 GCN 3 OPMD_PABPN1
chr14 86300519 86300603 CTG 3 SCA3_ATXN3
chr15 20458510 20458536 GCG 3 ALS1_NIPA1
chr15 86324038 86324057 TTTG 4 pre-MIR7-2_CHNG3
chr15 87088411 87088452 GCT 3 CPEO_POLG
chr16 17477909 17478002 GCC 3 DBQD2_XYLT1
chr16 24890366 24890430 TTTCA 5 FAME6_TNRC6A
chr16 72284666 72284761 TGGAA 5 SCA31_BEAN1
chr16 73638636 73638724 CAG 3 SCA_THAP11
chr16 78605502 78605569 GCC 3 SCA4_ZFHX3
chr16 93675723 93675776 CTG 3 HDL2_JPH3
chr17 17754961 17755053 TTTCA 5 FAME8_RAI1
chr17 81047404 81047534 CCTCGCTGTGCCGCTGCCGA 20 RCPS_EIF4A3
chr18 55789233 55789288 CAG 3 FECD3_TCF4
chr19 13333136 13333176 CTG 3 SCA6_CACNA1A
chr19 14622655 14622692 CCG 3 OPDM2_GIPC1
chr19 18921630 18921645 GTC 3 EDM1-PSACH_COMP
chr19 48597739 48597756 CAG 3 DM1_DMPK
chr20 2683189 2683230 GGCCTG 6 SCA36_NOP56
chr20 2683230 2683248 CGCCTG 6 SCA36_NOP56_flank
chr20 4738633 4738705 CCTCATGGTGGTGGCTGGGGGCAG 24 CJD_PRNP
chr20 4738705 4738732 CCTCAGGGCGGTGGTGGCTGGGGGCAG 27 CJD_PRNP_flank
chr21 42132054 42132091 CGCGGGGCGGGG 12 EPM1_CSTB
chr22 20143615 20143660 GCN 3 TOF_TBX1
chr22 46280059 46280134 ATTCT 5 SCA10_ATXN10
chrX 24597766 24597802 NGC 3 PRTS_ARX
chrX 24597886 24597934 NGC 3 EIEE1_ARX
chrX 30882677 30882743 TTC 3 DMD_DMD
chrX 30882743 30882751 T 1 DMD_DMD_flank
chrX 65975147 65975250 GCA 3 SBMA_AR
chrX 69887153 69887230 AGAGGG 6 XDP_TAF1
chrX 135876774 135876804 GCN 3 VACTERLX_ZIC3
chrX 138816203 138816248 NGC 3 XLMR_SOX3
chrX 146176677 146176769 CGG 3 FXS_FMR1
chrX 146765190 146765342 GCC 3 FRAXE_AFF2
Binary file not shown.
Binary file not shown.
84 changes: 84 additions & 0 deletions data/catalogs/STRchive-disease-loci.hg19.atarva.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#chrom start stop motif motif_len id
chr1 1371178 1371198 GGCGCGGAGC 10 HMNR7_VWA1
chr1 57832715 57832793 GAAAT 5 SCA37_DAB1
chr1 94883977 94884000 GCC 3 OPDM_ABCD3
chr1 145209323 145209354 GGC 3 NIID_NOTCH2NLC
chr1 155160981 155162030 GGCTNNGGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG 61 ADTKD_MUC1
chr1 156561557 156561575 GGGCC 5 NME_NAXE
chr2 96862804 96862862 AAATG 5 FAME2_STARD7
chr2 100721260 100721286 GCC 3 FRA2A_AFF3
chr2 176957786 176957831 GCN 3 SD5_HOXD13
chr2 191745598 191745646 GCA 3 GDPAG_GLS
chr3 63898360 63898391 CAG 3 SCA7_ATXN7
chr3 63898391 63898403 CCG 3 SCA7_ATXN7_flank
chr3 128891419 128891499 CAGG 4 DM2_CNBP
chr3 128891499 128891539 CAGA 4 DM2_CNBP_flank
chr3 128891539 128891577 CA 2 DM2_CNBP_flank
chr3 138664861 138664904 NGC 3 BPES_FOXL2
chr3 183429975 183430014 TTTCA 5 FAME4_YEATS2
chr4 3076603 3076660 CAG 3 HD_HTT
chr4 3076660 3076666 CAACAG 6 HD_HTT_flank
chr4 3076666 3076702 CCG 3 HD_HTT_flank
chr4 39350044 39350103 AAGGG 5 CANVAS_RFC1
chr4 41747989 41748049 GCN 3 CCHS_PHOX2B
chr4 160263678 160263770 TTTCA 5 FAME7_RAPGEF2
chr5 10356455 10356523 TTTCA 5 FAME3_MARCHF6
chr5 146258290 146258322 GCT 3 SCA12_PPP2R2B
chr6 16327864 16327955 CTG 3 SCA1_ATXN1
chr6 45390487 45390538 GCN 3 CCD_RUNX2
chr6 170870994 170871105 GCA 3 SCA17_TBP
chr7 27239297 27239351 NGC 3 HFG_HOXA13-III
chr7 27239444 27239480 NGC 3 HFG_HOXA13-II
chr7 27239543 27239585 NGC 3 HFG_HOXA13-I
chr7 55955293 55955332 GCG 3 FRA7A_ZNF713
chr8 105601198 105601227 CGC 3 OPDM1_LRP12
chr8 119379051 119379157 TGAAA 5 FAME1_SAMD12
chr9 27573482 27573544 GGCCCC 6 FTDALS1_C9orf72
chr9 71652202 71652220 GAA 3 FRDA_FXN
chr9 71652220 71652236 A 1 FRDA_FXN_flank
chr9 133556992 133557028 GCC 3 HSAN-VIII_PRDM12
chr10 81586139 81586160 GGC 3 OPML1_NUTM2B-AS1
chr11 119076999 119077033 CGG 3 JBS_CBL
chr12 7045879 7045938 CAG 3 DRPLA_ATN1
chr12 50898784 50898807 GGC 3 FRA12A_DIP2B
chr12 112036753 112036823 CTG 3 SCA2_ATXN2
chr12 124018267 124018297 GGC 3 OPDM4_RILPL1
chr13 70713515 70713561 CTG 3 SCA8_ATXN8OS
chr13 70713561 70713591 CTA 3 SCA8_ATXN8OS_flank
chr13 100637702 100637748 GCN 3 HPE5_ZIC2
chr13 102813924 102814076 GAA 3 SCA27B_FGF14
chr14 23790681 23790712 GCN 3 OPMD_PABPN1
chr14 92537354 92537396 CTG 3 SCA3_ATXN3
chr15 23086363 23086389 GCG 3 ALS1_NIPA1
chr15 89112664 89112683 TTTG 4 pre-MIR7-2_CHNG3
chr15 89876819 89876860 GCT 3 CPEO_POLG
chr16 17564764 17564779 GCC 3 DBQD2_XYLT1
chr16 24624759 24624853 TTTCA 5 FAME6_TNRC6A
chr16 66524299 66524369 TGGAA 5 SCA31_BEAN1
chr16 67876765 67876853 CAG 3 SCA_THAP11
chr16 72821593 72821657 GCC 3 SCA4_ZFHX3
chr16 87637888 87637935 CTG 3 HDL2_JPH3
chr17 17711672 17711774 TTTCA 5 FAME8_RAI1
chr17 78120808 78120938 CCTCGCTGTGCCGCTGCCGA 20 RCPS_EIF4A3
chr18 53253384 53253460 CAG 3 FECD3_TCF4
chr19 13318672 13318712 CTG 3 SCA6_CACNA1A
chr19 14606853 14606887 CCG 3 OPDM2_GIPC1
chr19 18896844 18896860 GTC 3 EDM1-PSACH_COMP
chr19 46273462 46273524 CAG 3 DM1_DMPK
chr20 2633378 2633403 GGCCTG 6 SCA36_NOP56
chr20 2633403 2633421 CGCCTG 6 SCA36_NOP56_flank
chr20 4680043 4680139 CCTCATGGTGGTGGCTGGGGGCAG 24 CJD_PRNP
chr20 4680139 4680166 CCTCAGGGCGGTGGTGGCTGGGGGCAG 27 CJD_PRNP_flank
chr21 45196323 45196360 CGCGGGGCGGGG 12 EPM1_CSTB
chr22 19754285 19754330 GCN 3 TOF_TBX1
chr22 46191234 46191304 ATTCT 5 SCA10_ATXN10
chrX 25031646 25031682 NGC 3 PRTS_ARX
chrX 25031766 25031814 NGC 3 EIEE1_ARX
chrX 31302674 31302722 TTC 3 DMD_DMD
chrX 31302722 31302730 T 1 DMD_DMD_flank
chrX 66765158 66765261 GCA 3 SBMA_AR
chrX 70672904 70672981 AGAGGG 6 XDP_TAF1
chrX 136648985 136649015 GCN 3 VACTERLX_ZIC3
chrX 139586481 139586526 NGC 3 XLMR_SOX3
chrX 146993567 146993629 CGG 3 FXS_FMR1
chrX 147582124 147582273 GCC 3 FRAXE_AFF2
Binary file not shown.
Binary file not shown.
84 changes: 84 additions & 0 deletions data/catalogs/STRchive-disease-loci.hg38.atarva.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#chrom start stop motif motif_len id
chr1 1435798 1435818 GGCGCGGAGC 10 HMNR7_VWA1
chr1 57367043 57367121 GAAAT 5 SCA37_DAB1
chr1 94418421 94418444 GCC 3 OPDM_ABCD3
chr1 149390802 149390842 GGC 3 NIID_NOTCH2NLC
chr1 155188505 155192239 GGCTNNGGGGNGCGGTGGAGCCCGGGGCNGGNCTGNTNTCCGGGGCCGAGGTGACANCNTG 61 ADTKD_MUC1
chr1 156591765 156591783 GGGCC 5 NME_NAXE
chr2 96197066 96197124 AAATG 5 FAME2_STARD7
chr2 100104798 100104824 GCC 3 FRA2A_AFF3
chr2 176093058 176093103 GCN 3 SD5_HOXD13
chr2 190880872 190880920 GCA 3 GDPAG_GLS
chr3 63912684 63912715 CAG 3 SCA7_ATXN7
chr3 63912715 63912727 CCG 3 SCA7_ATXN7_flank
chr3 129172576 129172656 CAGG 4 DM2_CNBP
chr3 129172656 129172696 CAGA 4 DM2_CNBP_flank
chr3 129172696 129172734 CA 2 DM2_CNBP_flank
chr3 138946019 138946062 NGC 3 BPES_FOXL2
chr3 183712187 183712226 TTTCA 5 FAME4_YEATS2
chr4 3074876 3074933 CAG 3 HD_HTT
chr4 3074933 3074939 CAACAG 6 HD_HTT_flank
chr4 3074939 3074975 CCG 3 HD_HTT_flank
chr4 39348424 39348483 AAGGG 5 CANVAS_RFC1
chr4 41745972 41746032 GCN 3 CCHS_PHOX2B
chr4 159342526 159342618 TTTCA 5 FAME7_RAPGEF2
chr5 10356343 10356411 TTTCA 5 FAME3_MARCHF6
chr5 146878727 146878759 GCT 3 SCA12_PPP2R2B
chr6 16327633 16327724 CTG 3 SCA1_ATXN1
chr6 45422750 45422801 GCN 3 CCD_RUNX2
chr6 170561906 170562017 GCA 3 SCA17_TBP
chr7 27199678 27199732 NGC 3 HFG_HOXA13-III
chr7 27199825 27199861 NGC 3 HFG_HOXA13-II
chr7 27199924 27199966 NGC 3 HFG_HOXA13-I
chr7 55887600 55887639 GCG 3 FRA7A_ZNF713
chr8 104588970 104588999 CGC 3 OPDM1_LRP12
chr8 118366812 118366918 TGAAA 5 FAME1_SAMD12
chr9 27573484 27573546 GGCCCC 6 FTDALS1_C9orf72
chr9 69037286 69037304 GAA 3 FRDA_FXN
chr9 69037304 69037320 A 1 FRDA_FXN_flank
chr9 130681605 130681641 GCC 3 HSAN-VIII_PRDM12
chr10 79826383 79826404 GGC 3 OPML1_NUTM2B-AS1
chr11 119206289 119206323 CGG 3 JBS_CBL
chr12 6936716 6936775 CAG 3 DRPLA_ATN1
chr12 50505001 50505024 GGC 3 FRA12A_DIP2B
chr12 111598949 111599019 CTG 3 SCA2_ATXN2
chr12 123533720 123533750 GGC 3 OPDM4_RILPL1
chr13 70139383 70139429 CTG 3 SCA8_ATXN8OS
chr13 70139429 70139459 CTA 3 SCA8_ATXN8OS_flank
chr13 99985448 99985494 GCN 3 HPE5_ZIC2
chr13 102161574 102161726 GAA 3 SCA27B_FGF14
chr14 23321472 23321503 GCN 3 OPMD_PABPN1
chr14 92071010 92071052 CTG 3 SCA3_ATXN3
chr15 22786677 22786703 GCG 3 ALS1_NIPA1
chr15 88569433 88569452 TTTG 4 pre-MIR7-2_CHNG3
chr15 89333588 89333629 GCT 3 CPEO_POLG
chr16 17470907 17470922 GCC 3 DBQD2_XYLT1
chr16 24613438 24613532 TTTCA 5 FAME6_TNRC6A
chr16 66490396 66490466 TGGAA 5 SCA31_BEAN1
chr16 67842862 67842950 CAG 3 SCA_THAP11
chr16 72787694 72787758 GCC 3 SCA4_ZFHX3
chr16 87604282 87604329 CTG 3 HDL2_JPH3
chr17 17808358 17808460 TTTCA 5 FAME8_RAI1
chr17 80147009 80147139 CCTCGCTGTGCCGCTGCCGA 20 RCPS_EIF4A3
chr18 55586153 55586229 CAG 3 FECD3_TCF4
chr19 13207858 13207898 CTG 3 SCA6_CACNA1A
chr19 14496041 14496075 CCG 3 OPDM2_GIPC1
chr19 18786034 18786050 GTC 3 EDM1-PSACH_COMP
chr19 45770204 45770266 CAG 3 DM1_DMPK
chr20 2652732 2652757 GGCCTG 6 SCA36_NOP56
chr20 2652757 2652775 CGCCTG 6 SCA36_NOP56_flank
chr20 4699397 4699493 CCTCATGGTGGTGGCTGGGGGCAG 24 CJD_PRNP
chr20 4699493 4699520 CCTCAGGGCGGTGGTGGCTGGGGGCAG 27 CJD_PRNP_flank
chr21 43776442 43776479 CGCGGGGCGGGG 12 EPM1_CSTB
chr22 19766762 19766807 GCN 3 TOF_TBX1
chr22 45795354 45795424 ATTCT 5 SCA10_ATXN10
chrX 25013529 25013565 NGC 3 PRTS_ARX
chrX 25013649 25013697 NGC 3 EIEE1_ARX
chrX 31284557 31284605 TTC 3 DMD_DMD
chrX 31284605 31284613 T 1 DMD_DMD_flank
chrX 67545316 67545419 GCA 3 SBMA_AR
chrX 71453054 71453131 AGAGGG 6 XDP_TAF1
chrX 137566826 137566856 GCN 3 VACTERLX_ZIC3
chrX 140504316 140504361 NGC 3 XLMR_SOX3
chrX 147912049 147912111 CGG 3 FXS_FMR1
chrX 148500604 148500753 GCC 3 FRAXE_AFF2
Binary file not shown.
Binary file not shown.
2 changes: 2 additions & 0 deletions scripts/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ dependencies:
- r-purrr
- gfortran
- pysam
- htslib
- bedtools
- nodejs # build website locally
- pyliftover
- pip
Expand Down
75 changes: 75 additions & 0 deletions scripts/make-catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,75 @@ def trgt_catalog(row, genome = 'hg38', struc_type = 'default'):

return definition

def atarva_catalog(row, genome = 'hg38'):
r"""
:param row: dictionary with STR data for a single locus
:param genome: genome build (hg19, hg38 or T2T)
:return: atarva format catalog string which is a modified BED format with fields: chrom start stop motif motif_len [id]

Note, compound loci will be split into multiple entries, one for each motif. Overlapping loci are okay.
For loci with multiple pathogenic motifs, only the first motif will be used. Atarva does motif decomposition, so alternate motifs should be detected by the caller.

>>> atarva_catalog({'chrom': 'chr1', 'start_hg38': 100, 'stop_hg38': 200, 'pathogenic_motif_reference_orientation': ['CAG'], 'flank_motif': '', 'gene': 'mygene', 'id': 'myid', 'pathogenic_min': 10, 'inheritance': 'AD', 'disease': 'Disease Name'}, 'hg38')
'chr1\t100\t200\tCAG\t3\tmyid'

>>> atarva_catalog({'chrom': 'chr1', 'start_hg38': 100, 'stop_hg38': 200, 'pathogenic_motif_reference_orientation': ['AAGGG', 'ACAGG'], 'flank_motif': '', 'gene': 'mygene', 'id': 'myid', 'pathogenic_min': 10, 'inheritance': 'AD', 'disease': 'Disease Name'}, 'hg38')
'chr1\t100\t200\tAAGGG\t5\tmyid'

>>> atarva_catalog({'chrom': 'chr1', 'start_hg38': 100, 'stop_hg38': 200, 'pathogenic_motif_reference_orientation': ['CAG'], 'flank_motif': '(CAG)nCAACAG(CCG)12', 'gene': 'mygene', 'id': 'myid', 'pathogenic_min': 10, 'inheritance': 'AD', 'disease': 'Disease Name'}, 'hg38')
'chr1\t100\t200\tCAG\t3\tmyid\nchr1\t200\t206\tCAACAG\t6\tmyid_flank\nchr1\t206\t242\tCCG\t3\tmyid_flank'

>>> atarva_catalog({'chrom': 'chr1', 'start_hg38': 100, 'stop_hg38': 200, 'pathogenic_motif_reference_orientation': ['CAG'], 'flank_motif': '(CAG)n(CCG)10(CAA)10', 'gene': 'mygene', 'id': 'myid', 'pathogenic_min': 10, 'inheritance': 'AD', 'disease': 'Disease Name'}, 'hg38')
'chr1\t100\t200\tCAG\t3\tmyid\nchr1\t200\t230\tCCG\t3\tmyid_flank\nchr1\t230\t260\tCAA\t3\tmyid_flank'
"""
bed_string = ''

motif_field = 'pathogenic_motif_reference_orientation'
id_field = 'id'
start = int(row['start_' + genome])
stop = int(row['stop_' + genome])

motifs = row[motif_field]
this_id = row[id_field]

motif = motifs[0] # use first motif only
motif_len = len(motif)
bed_string += f"{row['chrom']}\t{start}\t{stop}\t{motif}\t{motif_len}\t{this_id}\n"

# check for flanking motif(s)
if row['flank_motif'] != '' and row['flank_motif'] is not None:
# get motifs in parentheses using regex
flank_motif = row['flank_motif']

split_flank_counts = re.split(r"[ATCGN]+", flank_motif, )
all_flank_motifs_counts = []
for i in range(1, len(split_flank_counts)):
all_flank_motifs_counts.append(split_flank_counts[i].replace('(', '').replace(')', ''))

all_flank_motifs = ''
for char in flank_motif:
if char in ['(', ')', 'n', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']:
all_flank_motifs += ' '
else:
all_flank_motifs += char

all_flank_motifs = all_flank_motifs.split()

flank_start = stop
flank_stop = stop
for motif, count in zip(all_flank_motifs, all_flank_motifs_counts):
if count == '':
count = 1
if count == 'n':
continue
else:
flank_stop += int(count) * len(motif)
bed_string += f"{row['chrom']}\t{flank_start}\t{flank_stop}\t{motif}\t{len(motif)}\t{this_id}_flank\n"
flank_start = flank_stop

return bed_string.rstrip('\n')


def extended_bed(row, fields = [], genome = 'hg38'):
r"""
:param row: dictionary with STR data for a single locus
Expand Down Expand Up @@ -191,6 +260,12 @@ def main(input: str, output: str, *, format: str = 'TRGT', genome: str = 'hg38',
with open(output, 'w') as out_file:
for row in data:
out_file.write(trgt_catalog(row, genome) + '\n')
elif format.lower() == 'atarva':
with open(output, 'w') as out_file:
header = '#' + '\t'.join(['chrom', 'start', 'stop', 'motif', 'motif_len', 'id']) + '\n'
out_file.write(header)
for row in data:
out_file.write(atarva_catalog(row, genome) + '\n')
elif format.lower() == 'bed':
fields_list = fields.split(',')
header = '#' + '\t'.join(['chrom', 'start', 'stop'] + fields_list) + '\n'
Expand Down
Loading