@@ -57,7 +57,8 @@ def load_gtf(
5757 gtf = gtf [gtf ["Feature" ] == "gene" ]
5858
5959 # Find common genes between GTF and marker scores
60- common_gene = np .intersect1d (mk_score .index , gtf .gene_name )
60+ # common_gene = np.intersect1d(mk_score.index, gtf.gene_name)
61+ common_gene = list (set (mk_score .index ) & set (gtf .gene_name ))
6162 logger .info (f"Found { len (common_gene )} common genes between GTF and marker scores" )
6263
6364 # Filter GTF and marker scores to common genes
@@ -69,6 +70,9 @@ def load_gtf(
6970
7071 # Process the GTF (open window around gene coordinates)
7172 gtf_bed = gtf [["Chromosome" , "Start" , "End" , "gene_name" , "Strand" ]].copy ()
73+ gtf_bed ["Chromosome" ] = gtf_bed ["Chromosome" ].apply (
74+ lambda x : f"chr{ x } " if not str (x ).startswith ("chr" ) else x
75+ )
7276 gtf_bed .loc [:, "TSS" ] = gtf_bed ["Start" ]
7377 gtf_bed .loc [:, "TED" ] = gtf_bed ["End" ]
7478
@@ -128,7 +132,7 @@ def load_bim(bfile_root: str, chrom: int) -> tuple[pd.DataFrame, pr.PyRanges]:
128132 - bim_pr is a PyRanges object with BIM data
129133 """
130134 bim_file = f"{ bfile_root } .{ chrom } .bim"
131- logger .debug (f"Loading BIM file: { bim_file } " )
135+ logger .info (f"Loading BIM file: { bim_file } " )
132136
133137 bim = pd .read_csv (bim_file , sep = "\t " , header = None )
134138 bim .columns = ["CHR" , "SNP" , "CM" , "BP" , "A1" , "A2" ]
@@ -311,6 +315,8 @@ def get_ldscore(
311315 bfile_chr_prefix = f"{ bfile_root } .{ chrom } " , keep_snps = keep_snps_index
312316 )
313317
318+ annot_matrix = annot_matrix [geno_array .kept_snps , :]
319+
314320 # Configure LD window based on specified unit
315321 if ld_unit == "SNP" :
316322 max_dist = ld_wind
0 commit comments