@@ -416,53 +416,51 @@ def run_related_prune(self, related_cutoff=0.0884, duplicated_cutoff=0.354, prun
416416 if related_cutoff < 0 or related_cutoff > 1 or duplicated_cutoff < 0 or duplicated_cutoff > 1 :
417417 raise ValueError ("related_cutoff and duplicated_cutoff should be between 0 and 1." )
418418
419- grm1 = f"{ out_path } _total_grm"
420- grm2 = f"{ out_path } _related_grm"
421- grm3 = f"{ out_path } _duplicated_grm"
419+ king1 = f"{ out_path } _related_king"
420+ king2 = f"{ out_path } _duplicated_king"
422421
423422 related_pairs = f"{ out_path } _pairs"
424423 related_out = f"{ related_pairs } .related"
425424 related_pruned_out = f"{ out_path } .pruned"
426425
427426 # create pfiles
428- king_cmd1 = f'{ plink2_exec } --pfile { geno_path } --hwe 0.0001 --mac 2 --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out { grm1 } '
429427 # create table of related pairs
430- king_cmd2 = f'{ plink2_exec } --pfile { grm1 } --make-king-table --make-king triangle bin --king-table-filter { related_cutoff } --out { related_pairs } '
428+ king_cmd1 = f'{ plink2_exec } --pfile { geno_path } --make-king-table --make-king triangle bin --king-table-filter { related_cutoff } --out { related_pairs } '
431429 # see if any samples are related (includes duplicates)
432- king_cmd3 = f'{ plink2_exec } --pfile { grm1 } --king-cutoff { related_pairs } { related_cutoff } --out { grm2 } '
430+ king_cmd2 = f'{ plink2_exec } --pfile { geno_path } --king-cutoff { related_pairs } { related_cutoff } --out { king1 } '
433431 # see if any samples are duplicated (grm cutoff >= 0.354)
434- king_cmd4 = f'{ plink2_exec } --pfile { grm1 } --king-cutoff { related_pairs } { duplicated_cutoff } --out { grm3 } '
432+ king_cmd3 = f'{ plink2_exec } --pfile { geno_path } --king-cutoff { related_pairs } { duplicated_cutoff } --out { king2 } '
435433
436- cmds = [king_cmd1 , king_cmd2 , king_cmd3 , king_cmd4 ]
434+ cmds = [king_cmd1 , king_cmd2 , king_cmd3 ]
437435 for cmd in cmds :
438436 shell_do (cmd )
439437
440- listOfFiles = [f'{ grm1 } .log' , f' { related_pairs } .log' , f'{ grm2 } .log' , f'{ grm3 } .log' ]
438+ listOfFiles = [f'{ related_pairs } .log' , f'{ king1 } .log' , f'{ king2 } .log' ]
441439 concat_logs (step , out_path , listOfFiles )
442440
443- if os .path .isfile (f'{ related_pairs } .kin0' ) and os .path .isfile (f'{ grm2 } .king.cutoff.out.id' ) and os .path .isfile (f'{ grm3 } .king.cutoff.out.id' ):
441+ if os .path .isfile (f'{ related_pairs } .kin0' ) and os .path .isfile (f'{ king1 } .king.cutoff.out.id' ) and os .path .isfile (f'{ king2 } .king.cutoff.out.id' ):
444442
445443 # create .related related pair sample files
446444 kinship = pd .read_csv (f'{ related_pairs } .kin0' , sep = '\s+' )
447445 kinship ['REL' ] = pd .cut (x = kinship ['KINSHIP' ], bins = [- np .inf , 0.0884 , 0.177 , 0.354 , np .inf ], labels = ['unrel' , 'second_deg' , 'first_deg' , 'duplicate' ])
448446 kinship .to_csv (f'{ related_pairs } .related' , index = False )
449447
450448 # create .related and .duplicated single sample files
451- shutil .copy (f'{ grm2 } .king.cutoff.out.id' ,f'{ grm2 } .related' )
452- related_count = sum (1 for line in open (f'{ grm2 } .related' ))
449+ shutil .copy (f'{ king1 } .king.cutoff.out.id' ,f'{ king1 } .related' )
450+ related_count = sum (1 for line in open (f'{ king1 } .related' ))
453451
454- shutil .copy (f'{ grm3 } .king.cutoff.out.id' ,f'{ grm3 } .duplicated' )
455- duplicated_count = sum (1 for line in open (f'{ grm3 } .duplicated' ))
452+ shutil .copy (f'{ king2 } .king.cutoff.out.id' ,f'{ king2 } .duplicated' )
453+ duplicated_count = sum (1 for line in open (f'{ king2 } .duplicated' ))
456454
457455 related_count = related_count - duplicated_count
458- duplicated = pd .read_csv (f'{ grm3 } .duplicated' , sep = '\s+' )
456+ duplicated = pd .read_csv (f'{ king2 } .duplicated' , sep = '\s+' )
459457
460458 # concat duplicated sample ids to related sample ids, drop_duplicates(keep='last) because all duplicated would also be considered related
461459 if prune_related and prune_duplicated :
462- plink_cmd1 = f'{ plink2_exec } --pfile { grm1 } --remove { grm2 } .king.cutoff.out.id --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out { out_path } '
460+ plink_cmd1 = f'{ plink2_exec } --pfile { geno_path } --remove { king1 } .king.cutoff.out.id --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out { out_path } '
463461 shell_do (plink_cmd1 )
464462
465- related = pd .read_csv (f'{ grm2 } .related' , sep = '\s+' )
463+ related = pd .read_csv (f'{ king1 } .related' , sep = '\s+' )
466464 grm_pruned = pd .concat ([related , duplicated ], ignore_index = True )
467465
468466 if '#FID' in grm_pruned :
@@ -475,7 +473,7 @@ def run_related_prune(self, related_cutoff=0.0884, duplicated_cutoff=0.354, prun
475473 process_complete = True
476474
477475 if prune_duplicated and not prune_related :
478- plink_cmd1 = f'{ plink2_exec } --pfile { grm1 } --remove { grm3 } .king.cutoff.out.id --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out { out_path } '
476+ plink_cmd1 = f'{ plink2_exec } --pfile { geno_path } --remove { king2 } .king.cutoff.out.id --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out { out_path } '
479477 shell_do (plink_cmd1 )
480478
481479 grm_pruned = duplicated
@@ -513,15 +511,12 @@ def run_related_prune(self, related_cutoff=0.0884, duplicated_cutoff=0.354, prun
513511 concat_logs (step , out_path , listOfFiles )
514512
515513 # remove intermediate files
516- os .remove (f'{ grm1 } .pgen' )
517- os .remove (f'{ grm1 } .psam' )
518- os .remove (f'{ grm1 } .pvar' )
519- os .remove (f'{ grm2 } .king.cutoff.in.id' )
520- os .remove (f'{ grm2 } .king.cutoff.out.id' )
521- os .remove (f'{ grm2 } .related' )
522- os .remove (f'{ grm3 } .duplicated' )
523- os .remove (f'{ grm3 } .king.cutoff.in.id' )
524- os .remove (f'{ grm3 } .king.cutoff.out.id' )
514+ os .remove (f'{ king1 } .king.cutoff.in.id' )
515+ os .remove (f'{ king1 } .king.cutoff.out.id' )
516+ os .remove (f'{ king1 } .related' )
517+ os .remove (f'{ king2 } .duplicated' )
518+ os .remove (f'{ king2 } .king.cutoff.in.id' )
519+ os .remove (f'{ king2 } .king.cutoff.out.id' )
525520 os .remove (f'{ related_pairs } .king.bin' )
526521 os .remove (f'{ related_pairs } .king.id' )
527522 os .remove (f'{ related_pairs } .kin0' )
@@ -545,7 +540,7 @@ def run_related_prune(self, related_cutoff=0.0884, duplicated_cutoff=0.354, prun
545540 outfiles_dict = {
546541 'pruned_samples' : 'Related Pruning Failed' ,
547542 'related_samples' : None ,
548- 'plink_out' : [grm1 , grm2 , grm3 ]
543+ 'plink_out' : [king1 , king2 ]
549544 }
550545
551546 metrics_dict = {
0 commit comments