Skip to content

Commit 36121f8

Browse files
committed
Built on 2026-02-24 V008
1 parent 48c1f4c commit 36121f8

File tree

7 files changed

+201
-81
lines changed

7 files changed

+201
-81
lines changed

.DS_Store

2 KB
Binary file not shown.

segpy/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
v0.0.8, 2026-02-22
2+
* Correct issue in sample id.
13
v0.0.5, 2025-09-12
24
* Correct issue in case-control.
35
v0.0.6, 2024-12-12

segpy/segpy/segrun/segrun_family_wise_whole.py

Lines changed: 83 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -83,90 +83,95 @@ def sample_retrieve(mt, ped, outfolder):
8383
##### list of sample in the affected and non-affected family
8484
glb_aff=[ x for x in ped.loc[ped.loc[:,'phenotype']==2,'individualid']]
8585
glb_naf=[ x for x in ped.loc[ped.loc[:,'phenotype']==1,'individualid']]
86-
##### list of family
87-
#################################################### Global affected
88-
glb_aff_sam = hl.literal(hl.set(glb_aff))
89-
glb_aff_sam_mt=mt.filter_cols(glb_aff_sam.contains(mt.s))
90-
# fam_aff_mt.count()
91-
## wildtype
92-
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_r= glb_aff)
93-
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_wild = hl.agg.sum(glb_aff_sam_mt.wild))
94-
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_wild_c = hl.agg.collect(glb_aff_sam_mt.wild))
95-
# no call
96-
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_ncl = hl.agg.sum(glb_aff_sam_mt.ncl))
97-
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_ncl_c = hl.agg.collect(glb_aff_sam_mt.ncl))
98-
# variant
99-
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_vrt = hl.agg.sum(glb_aff_sam_mt.vrt))
100-
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_vrt_c = hl.agg.collect(glb_aff_sam_mt.vrt))
101-
# hom_var: contains identical alternate alleles
102-
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_homv = hl.agg.sum(glb_aff_sam_mt.homv))
103-
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_homv_c = hl.agg.collect(glb_aff_sam_mt.homv))
104-
# altaf: contains ALT allele frequency
105-
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_altaf = (hl.agg.call_stats(glb_aff_sam_mt.GT, glb_aff_sam_mt.alleles).AF[1]))
106-
# listt2=[]
107-
# name2=f'{outfolder}/samp/glb_aff.csv'
108-
listt2=['glb_aff_r','glb_aff_wild','glb_aff_wild_c','glb_aff_ncl','glb_aff_ncl_c','glb_aff_vrt','glb_aff_vrt_c', 'glb_aff_homv', 'glb_aff_homv_c','glb_aff_altaf']
109-
df3=glb_aff_sam_mt.rows().select(*listt2).to_spark()
11086
udf_i = udf(lambda x: np.where(x)[0].tolist(), ArrayType(IntegerType()))
111-
df4=df3.select('glb_aff_r','glb_aff_wild','glb_aff_wild_c','glb_aff_ncl','glb_aff_ncl_c','glb_aff_vrt','glb_aff_vrt_c', 'glb_aff_homv', 'glb_aff_homv_c','glb_aff_altaf',udf_i('glb_aff_wild_c').alias('glb_aff_wild_c2'),udf_i('glb_aff_ncl_c').alias('glb_aff_ncl_c2'),udf_i('glb_aff_vrt_c').alias('glb_aff_vrt_c2'),udf_i('glb_aff_homv_c').alias('glb_aff_homv_c2'))
112-
del df3
11387
udf_2b = udf(lambda x,ref: [x[i] for i in ref])
114-
df5=df4.select('glb_aff_wild',udf_2b('glb_aff_r', 'glb_aff_wild_c2').alias('glb_aff_wild_s'),'glb_aff_ncl',udf_2b('glb_aff_r', 'glb_aff_ncl_c2').alias('glb_aff_ncl_s'),'glb_aff_vrt', udf_2b('glb_aff_r', 'glb_aff_vrt_c2').alias('glb_aff_vrt_s'),'glb_aff_homv', udf_2b('glb_aff_r', 'glb_aff_homv_c2').alias('glb_aff_homv_s'),'glb_aff_altaf')
11588
str_udf = F.udf(str_list, T.StringType())
116-
df6=df5.select('glb_aff_wild',str_udf('glb_aff_wild_s').alias('glb_aff_wild_samp'),'glb_aff_ncl',str_udf('glb_aff_ncl_s').alias('glb_aff_ncl_samp'),'glb_aff_vrt', str_udf('glb_aff_vrt_s').alias('glb_aff_vrt_samp'),'glb_aff_homv',str_udf('glb_aff_homv_s').alias('glb_aff_homv_samp'),'glb_aff_altaf')
117-
del df5
118-
name_glb_csv=f'{outfolder}/samp/glb_affcsv'
119-
df6.repartition(1).write.format("csv").mode('overwrite').option("sep","\t").option("header", "true").save(name_glb_csv)
120-
del df6
121-
cmd_glb_aff0=f'cd {outfolder}/samp/glb_affcsv ; find . -type f -name \""*.csv"\" -exec mv {{}} ../glb_aff_out \; ; rm -r ../glb_affcsv'
122-
os.system(cmd_glb_aff0)
123-
cmd_prune_glb=f'cd {outfolder}/samp;' + ' sed -i glb_aff_out -e \"s/\[\]/\[\\"\\"\]/g ; s/\[\'/\[\\"/g ; s/\'\]/\\"\]/g ; s/\'/\\"/g ; s/\[\]/\[\"\"\]/g \" '
124-
os.system(cmd_prune_glb)
125-
cmd_prune_glb_mv=f'mv {outfolder}/samp/glb_aff_out {outfolder}/glb_aff_out'
126-
os.system(cmd_prune_glb_mv)
89+
##### list of family
90+
#################################################### Global affected
91+
if len(glb_aff) > 0:
92+
glb_aff_sam = hl.literal(hl.set(glb_aff))
93+
glb_aff_sam_mt=mt.filter_cols(glb_aff_sam.contains(mt.s))
94+
# fam_aff_mt.count()
95+
## wildtype
96+
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_r= glb_aff)
97+
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_wild = hl.agg.sum(glb_aff_sam_mt.wild))
98+
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_wild_c = hl.agg.collect(glb_aff_sam_mt.wild))
99+
# no call
100+
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_ncl = hl.agg.sum(glb_aff_sam_mt.ncl))
101+
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_ncl_c = hl.agg.collect(glb_aff_sam_mt.ncl))
102+
# variant
103+
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_vrt = hl.agg.sum(glb_aff_sam_mt.vrt))
104+
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_vrt_c = hl.agg.collect(glb_aff_sam_mt.vrt))
105+
# hom_var: contains identical alternate alleles
106+
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_homv = hl.agg.sum(glb_aff_sam_mt.homv))
107+
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_homv_c = hl.agg.collect(glb_aff_sam_mt.homv))
108+
# altaf: contains ALT allele frequency
109+
glb_aff_sam_mt = glb_aff_sam_mt.annotate_rows(glb_aff_altaf = (hl.agg.call_stats(glb_aff_sam_mt.GT, glb_aff_sam_mt.alleles).AF[1]))
110+
# listt2=[]
111+
# name2=f'{outfolder}/samp/glb_aff.csv'
112+
listt2=['glb_aff_r','glb_aff_wild','glb_aff_wild_c','glb_aff_ncl','glb_aff_ncl_c','glb_aff_vrt','glb_aff_vrt_c', 'glb_aff_homv', 'glb_aff_homv_c','glb_aff_altaf']
113+
df3=glb_aff_sam_mt.rows().select(*listt2).to_spark()
114+
udf_i = udf(lambda x: np.where(x)[0].tolist(), ArrayType(IntegerType()))
115+
df4=df3.select('glb_aff_r','glb_aff_wild','glb_aff_wild_c','glb_aff_ncl','glb_aff_ncl_c','glb_aff_vrt','glb_aff_vrt_c', 'glb_aff_homv', 'glb_aff_homv_c','glb_aff_altaf',udf_i('glb_aff_wild_c').alias('glb_aff_wild_c2'),udf_i('glb_aff_ncl_c').alias('glb_aff_ncl_c2'),udf_i('glb_aff_vrt_c').alias('glb_aff_vrt_c2'),udf_i('glb_aff_homv_c').alias('glb_aff_homv_c2'))
116+
del df3
117+
udf_2b = udf(lambda x,ref: [x[i] for i in ref])
118+
df5=df4.select('glb_aff_wild',udf_2b('glb_aff_r', 'glb_aff_wild_c2').alias('glb_aff_wild_s'),'glb_aff_ncl',udf_2b('glb_aff_r', 'glb_aff_ncl_c2').alias('glb_aff_ncl_s'),'glb_aff_vrt', udf_2b('glb_aff_r', 'glb_aff_vrt_c2').alias('glb_aff_vrt_s'),'glb_aff_homv', udf_2b('glb_aff_r', 'glb_aff_homv_c2').alias('glb_aff_homv_s'),'glb_aff_altaf')
119+
str_udf = F.udf(str_list, T.StringType())
120+
df6=df5.select('glb_aff_wild',str_udf('glb_aff_wild_s').alias('glb_aff_wild_samp'),'glb_aff_ncl',str_udf('glb_aff_ncl_s').alias('glb_aff_ncl_samp'),'glb_aff_vrt', str_udf('glb_aff_vrt_s').alias('glb_aff_vrt_samp'),'glb_aff_homv',str_udf('glb_aff_homv_s').alias('glb_aff_homv_samp'),'glb_aff_altaf')
121+
del df5
122+
name_glb_csv=f'{outfolder}/samp/glb_affcsv'
123+
df6.repartition(1).write.format("csv").mode('overwrite').option("sep","\t").option("header", "true").save(name_glb_csv)
124+
del df6
125+
cmd_glb_aff0=f'cd {outfolder}/samp/glb_affcsv ; find . -type f -name \""*.csv"\" -exec mv {{}} ../glb_aff_out \; ; rm -r ../glb_affcsv'
126+
os.system(cmd_glb_aff0)
127+
cmd_prune_glb=f'cd {outfolder}/samp;' + ' sed -i glb_aff_out -e \"s/\[\]/\[\\"\\"\]/g ; s/\[\'/\[\\"/g ; s/\'\]/\\"\]/g ; s/\'/\\"/g ; s/\[\]/\[\"\"\]/g \" '
128+
os.system(cmd_prune_glb)
129+
cmd_prune_glb_mv=f'mv {outfolder}/samp/glb_aff_out {outfolder}/glb_aff_out'
130+
os.system(cmd_prune_glb_mv)
127131
# list_generated.append('glb_aff.csv')
128132
##################################################### Global unaffected
129133
##### generate global non-affected
130-
glb_naf_sam = hl.literal(hl.set(glb_naf))
131-
glb_naf_sam_mt=mt.filter_cols(glb_naf_sam.contains(mt.s))
132-
# glb_naf_sam_mt.count()
133-
## wildtype
134-
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_r= glb_naf)
135-
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_wild = hl.agg.sum(glb_naf_sam_mt.wild))
136-
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_wild_c = hl.agg.collect(glb_naf_sam_mt.wild))
137-
# no call
138-
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_ncl = hl.agg.sum(glb_naf_sam_mt.ncl))
139-
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_ncl_c = hl.agg.collect(glb_naf_sam_mt.ncl))
140-
# variant
141-
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_vrt = hl.agg.sum(glb_naf_sam_mt.vrt))
142-
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_vrt_c = hl.agg.collect(glb_naf_sam_mt.vrt))
143-
# hom_var: contains identical alternate alleles
144-
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_homv = hl.agg.sum(glb_naf_sam_mt.homv))
145-
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_homv_c = hl.agg.collect(glb_naf_sam_mt.homv))
146-
# altaf: contains ALT allele frequency
147-
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_altaf = (hl.agg.call_stats(glb_naf_sam_mt.GT, glb_naf_sam_mt.alleles).AF[1]))
148-
# listt2=[]
149-
# name2=f'{outfolder}/samp/glb_naf.csv'
150-
listt2=['glb_naf_r','glb_naf_wild','glb_naf_wild_c','glb_naf_ncl','glb_naf_ncl_c','glb_naf_vrt','glb_naf_vrt_c', 'glb_naf_homv', 'glb_naf_homv_c','glb_naf_altaf']
151-
# spark = SparkSession.builder.appName("myApp").getOrCreate()
152-
df3=glb_naf_sam_mt.rows().select(*listt2).to_spark()
153-
# udf_i = udf(lambda x: np.where(x)[0].tolist(), ArrayType(IntegerType()))
154-
df4=df3.select('glb_naf_r','glb_naf_wild','glb_naf_wild_c','glb_naf_ncl','glb_naf_ncl_c','glb_naf_vrt','glb_naf_vrt_c', 'glb_naf_homv', 'glb_naf_homv_c','glb_naf_altaf',udf_i('glb_naf_wild_c').alias('glb_naf_wild_c2'),udf_i('glb_naf_ncl_c').alias('glb_naf_ncl_c2'),udf_i('glb_naf_vrt_c').alias('glb_naf_vrt_c2'),udf_i('glb_naf_homv_c').alias('glb_naf_homv_c2'))
155-
del df3
156-
# udf_2b = udf(lambda x,ref: [x[i] for i in ref])
157-
df5=df4.select('glb_naf_wild',udf_2b('glb_naf_r', 'glb_naf_wild_c2').alias('glb_naf_wild_s'),'glb_naf_ncl',udf_2b('glb_naf_r', 'glb_naf_ncl_c2').alias('glb_naf_ncl_s'),'glb_naf_vrt', udf_2b('glb_naf_r', 'glb_naf_vrt_c2').alias('glb_naf_vrt_s'),'glb_naf_homv', udf_2b('glb_naf_r', 'glb_naf_homv_c2').alias('glb_naf_homv_s'),'glb_naf_altaf')
158-
# str_udf = F.udf(str_list, T.StringType())
159-
df6=df5.select('glb_naf_wild',str_udf('glb_naf_wild_s').alias('glb_naf_wild_samp'),'glb_naf_ncl',str_udf('glb_naf_ncl_s').alias('glb_naf_ncl_samp'),'glb_naf_vrt', str_udf('glb_naf_vrt_s').alias('glb_naf_vrt_samp'),'glb_naf_homv',str_udf('glb_naf_homv_s').alias('glb_naf_homv_samp'),'glb_naf_altaf')
160-
del df5
161-
name_glb_csv=f'{outfolder}/samp/glb_nafcsv'
162-
df6.repartition(1).write.format("csv").mode('overwrite').option("sep","\t").option("header", "true").save(name_glb_csv)
163-
del df6
164-
cmd_glb_aff0=f'cd {outfolder}/samp/glb_nafcsv ; find . -type f -name \""*.csv"\" -exec mv {{}} ../glb_naf_out \; ; rm -r ../glb_nafcsv'
165-
os.system(cmd_glb_aff0)
166-
cmd_prune_glb=f'cd {outfolder}/samp;' + ' sed -i glb_naf_out -e \"s/\[\]/\[\\"\\"\]/g ; s/\[\'/\[\\"/g ; s/\'\]/\\"\]/g ; s/\'/\\"/g ; s/\[\]/\[\"\"\]/g \" '
167-
os.system(cmd_prune_glb)
168-
cmd_prune_glb_mv=f'mv {outfolder}/samp/glb_naf_out {outfolder}/glb_naf_out'
169-
os.system(cmd_prune_glb_mv)
134+
if len(glb_naf) > 0:
135+
glb_naf_sam = hl.literal(hl.set(glb_naf))
136+
glb_naf_sam_mt=mt.filter_cols(glb_naf_sam.contains(mt.s))
137+
# glb_naf_sam_mt.count()
138+
## wildtype
139+
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_r= glb_naf)
140+
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_wild = hl.agg.sum(glb_naf_sam_mt.wild))
141+
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_wild_c = hl.agg.collect(glb_naf_sam_mt.wild))
142+
# no call
143+
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_ncl = hl.agg.sum(glb_naf_sam_mt.ncl))
144+
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_ncl_c = hl.agg.collect(glb_naf_sam_mt.ncl))
145+
# variant
146+
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_vrt = hl.agg.sum(glb_naf_sam_mt.vrt))
147+
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_vrt_c = hl.agg.collect(glb_naf_sam_mt.vrt))
148+
# hom_var: contains identical alternate alleles
149+
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_homv = hl.agg.sum(glb_naf_sam_mt.homv))
150+
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_homv_c = hl.agg.collect(glb_naf_sam_mt.homv))
151+
# altaf: contains ALT allele frequency
152+
glb_naf_sam_mt = glb_naf_sam_mt.annotate_rows(glb_naf_altaf = (hl.agg.call_stats(glb_naf_sam_mt.GT, glb_naf_sam_mt.alleles).AF[1]))
153+
# listt2=[]
154+
# name2=f'{outfolder}/samp/glb_naf.csv'
155+
listt2=['glb_naf_r','glb_naf_wild','glb_naf_wild_c','glb_naf_ncl','glb_naf_ncl_c','glb_naf_vrt','glb_naf_vrt_c', 'glb_naf_homv', 'glb_naf_homv_c','glb_naf_altaf']
156+
# spark = SparkSession.builder.appName("myApp").getOrCreate()
157+
df3=glb_naf_sam_mt.rows().select(*listt2).to_spark()
158+
# udf_i = udf(lambda x: np.where(x)[0].tolist(), ArrayType(IntegerType()))
159+
df4=df3.select('glb_naf_r','glb_naf_wild','glb_naf_wild_c','glb_naf_ncl','glb_naf_ncl_c','glb_naf_vrt','glb_naf_vrt_c', 'glb_naf_homv', 'glb_naf_homv_c','glb_naf_altaf',udf_i('glb_naf_wild_c').alias('glb_naf_wild_c2'),udf_i('glb_naf_ncl_c').alias('glb_naf_ncl_c2'),udf_i('glb_naf_vrt_c').alias('glb_naf_vrt_c2'),udf_i('glb_naf_homv_c').alias('glb_naf_homv_c2'))
160+
del df3
161+
# udf_2b = udf(lambda x,ref: [x[i] for i in ref])
162+
df5=df4.select('glb_naf_wild',udf_2b('glb_naf_r', 'glb_naf_wild_c2').alias('glb_naf_wild_s'),'glb_naf_ncl',udf_2b('glb_naf_r', 'glb_naf_ncl_c2').alias('glb_naf_ncl_s'),'glb_naf_vrt', udf_2b('glb_naf_r', 'glb_naf_vrt_c2').alias('glb_naf_vrt_s'),'glb_naf_homv', udf_2b('glb_naf_r', 'glb_naf_homv_c2').alias('glb_naf_homv_s'),'glb_naf_altaf')
163+
# str_udf = F.udf(str_list, T.StringType())
164+
df6=df5.select('glb_naf_wild',str_udf('glb_naf_wild_s').alias('glb_naf_wild_samp'),'glb_naf_ncl',str_udf('glb_naf_ncl_s').alias('glb_naf_ncl_samp'),'glb_naf_vrt', str_udf('glb_naf_vrt_s').alias('glb_naf_vrt_samp'),'glb_naf_homv',str_udf('glb_naf_homv_s').alias('glb_naf_homv_samp'),'glb_naf_altaf')
165+
del df5
166+
name_glb_csv=f'{outfolder}/samp/glb_nafcsv'
167+
df6.repartition(1).write.format("csv").mode('overwrite').option("sep","\t").option("header", "true").save(name_glb_csv)
168+
del df6
169+
cmd_glb_aff0=f'cd {outfolder}/samp/glb_nafcsv ; find . -type f -name \""*.csv"\" -exec mv {{}} ../glb_naf_out \; ; rm -r ../glb_nafcsv'
170+
os.system(cmd_glb_aff0)
171+
cmd_prune_glb=f'cd {outfolder}/samp;' + ' sed -i glb_naf_out -e \"s/\[\]/\[\\"\\"\]/g ; s/\[\'/\[\\"/g ; s/\'\]/\\"\]/g ; s/\'/\\"/g ; s/\[\]/\[\"\"\]/g \" '
172+
os.system(cmd_prune_glb)
173+
cmd_prune_glb_mv=f'mv {outfolder}/samp/glb_naf_out {outfolder}/glb_naf_out'
174+
os.system(cmd_prune_glb_mv)
170175
# mt.rows().export(f'{outfolder}/out_locus0',delimiter='\t')
171176
# cmd_glb_csq=f'cut -f 1-2 {outfolder}/out_locus0 > {outfolder}/out_locus; rm {outfolder}/out_locus0'
172177
# os.system(cmd_glb_csq)
@@ -304,7 +309,6 @@ def segrun_family_wise_whole(mt, ped, outfolder, hl, csqlabel, affecteds_only, f
304309
cmd_paste = 'paste $(ls -rt ' + f'{outfolder}/out_csq_*) > {outfolder}/out_csq; rm {outfolder}/out_csq_*'
305310
os.system(cmd_paste)
306311
timekeeping(step, start_time)
307-
308312
# processing the rest of the INFO field data
309313
# pseudo: vcf INFO fields are not natively exportable as separate columns to tsv;
310314
# therefore the process to do so is:

segpy/segpy/segrun/segrun_family_wise_whole_multiple.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@ def export_counts(mt, prefix, outfile):
6363

6464
# generate wild/ncl/vrt/homv counts for one matrixTable for multiple family
6565
def generate_counts_m(mt, fam, sample_list,ped):
66-
fam_aff=[ x for x in ped.loc[ped.loc[:,'familyid']==fam,'individualid']]
66+
# fam_aff=[ x for x in ped.loc[ped.loc[:,'familyid']==fam,'individualid']]
67+
fam_aff=sample_list
68+
if not sample_list:
69+
fam_aff=[ x for x in ped.loc[ped.loc[:,'familyid']==fam,'individualid']]
6770
aff_sam = hl.literal(hl.set(fam_aff))
6871
aff_sam2=mt.filter_cols(aff_sam.contains(mt.s))
6972
if len(sample_list) > 0:

segpy/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# Needed for dependencies
1313
install_requires=['numpy', 'pandas', 'hail'],
1414
# *strongly* suggested for sharing
15-
version='0.0.7',
15+
version='0.0.8',
1616
license='MIT',
1717
description='Segpy: A pipeline for segregation analysis',
1818
# We will also need a readme eventually (there will be a warning)

test/.DS_Store

0 Bytes
Binary file not shown.

test/module_python.txt

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
Package Version
2+
------------------------ ----------------
3+
aiodns 2.0.0
4+
aiohttp 3.9.1
5+
aiohttp-session 2.12.0
6+
aiosignal 1.3.1
7+
anyio 4.2.0
8+
async-timeout 4.0.3
9+
asyncinit 0.2.4
10+
attrs 23.2.0
11+
avro 1.11.3
12+
azure-common 1.1.28
13+
azure-core 1.29.6
14+
azure-identity 1.15.0
15+
azure-mgmt-core 1.4.0
16+
azure-mgmt-storage 20.1.0
17+
azure-storage-blob 12.19.0
18+
bokeh 1.4.0
19+
boto3 1.34.11
20+
botocore 1.34.11
21+
cachetools 5.3.2
22+
certifi 2023.11.17
23+
cffi 1.16.0
24+
chardet 4.0.0
25+
charset-normalizer 3.3.2
26+
click 8.1.7
27+
commonmark 0.9.1
28+
contourpy 1.2.0
29+
cryptography 41.0.7
30+
dbus-python 1.2.18
31+
decorator 4.4.2
32+
Deprecated 1.2.14
33+
dill 0.3.7
34+
exceptiongroup 1.2.0
35+
frozenlist 1.4.1
36+
google-api-core 2.15.0
37+
google-auth 2.14.1
38+
google-auth-oauthlib 0.8.0
39+
google-cloud-core 2.4.1
40+
google-cloud-storage 2.11.0
41+
google-crc32c 1.5.0
42+
google-resumable-media 2.7.0
43+
googleapis-common-protos 1.62.0
44+
hail 0.2.109
45+
humanize 1.1.0
46+
hurry.filesize 0.9
47+
idna 3.6
48+
isodate 0.6.1
49+
janus 1.0.0
50+
Jinja2 3.0.3
51+
jmespath 1.0.1
52+
jproperties 2.1.1
53+
MarkupSafe 2.1.3
54+
msal 1.26.0
55+
msal-extensions 1.1.0
56+
msrest 0.7.1
57+
multidict 6.0.4
58+
nest-asyncio 1.5.8
59+
numpy 1.25.2
60+
oauthlib 3.2.2
61+
orjson 3.9.10
62+
packaging 23.2
63+
pandas 1.5.3
64+
parsimonious 0.8.1
65+
Pillow 10.1.0
66+
pip 22.0.2
67+
plotly 5.10.0
68+
portalocker 2.8.2
69+
protobuf 3.20.2
70+
py4j 0.10.9
71+
pyasn1 0.5.1
72+
pyasn1-modules 0.3.0
73+
pycares 4.4.0
74+
pycparser 2.21
75+
Pygments 2.17.2
76+
PyGObject 3.42.1
77+
PyJWT 2.8.0
78+
pyspark 3.1.3
79+
python-apt 2.4.0+ubuntu2
80+
python-dateutil 2.8.2
81+
python-debian 0.1.43+ubuntu1.1
82+
python-json-logger 2.0.7
83+
pytz 2023.3.post1
84+
PyYAML 6.0.1
85+
regex 2023.12.25
86+
requests 2.31.0
87+
requests-oauthlib 1.3.1
88+
rich 12.6.0
89+
rsa 4.9
90+
s3transfer 0.10.0
91+
scipy 1.9.3
92+
segpy 0.0.8
93+
session-info 1.0.0
94+
setuptools 59.6.0
95+
six 1.16.0
96+
sniffio 1.3.0
97+
sortedcontainers 2.4.0
98+
stdlib-list 0.10.0
99+
tabulate 0.9.0
100+
tenacity 8.2.3
101+
tornado 6.4
102+
typer 0.9.0
103+
typing_extensions 4.9.0
104+
tzdata 2023.4
105+
urllib3 2.0.7
106+
uvloop 0.19.0
107+
wheel 0.37.1
108+
wrapt 1.16.0
109+
xyzservices 2023.10.1
110+
yarl 1.9.4
111+

0 commit comments

Comments
 (0)