Skip to content

Commit 7f58935

Browse files
committed
Fix liver proteomics script
1 parent 03d12e0 commit 7f58935

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

coderbuild/liver/02-omics-liver.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def map_proteomics(proteomics_data, improve_id_data, entrez_data):
281281

282282
# read in data
283283
if isinstance(proteomics_data, pd.DataFrame) == False:
284-
proteomics_data = pd.read_csv(proteomics_data)
284+
proteomics_data = pd.read_csv(proteomics_data, dtype=str, low_memory=False)
285285

286286
if isinstance(improve_id_data, pd.DataFrame) == False:
287287
improve_id_data = pd.read_csv(improve_id_data)
@@ -299,8 +299,13 @@ def map_proteomics(proteomics_data, improve_id_data, entrez_data):
299299
long_prot_df = long_prot_df.rename(columns = {0:'sample_name', 'value':'proteomics'})
300300

301301

302+
# Ensure both columns are string types for merging
303+
long_prot_df['gene_symbol'] = long_prot_df['gene_symbol'].astype(str)
304+
entrez_data['other_id'] = entrez_data['other_id'].astype(str)
305+
302306
# map gene names to entrez id's
303307
mapped_proteomics_df = pd.merge(long_prot_df, entrez_data[['other_id','entrez_id']].drop_duplicates(), how = 'inner', left_on= "gene_symbol", right_on= "other_id")
308+
304309
mapped_proteomics_df = mapped_proteomics_df.dropna(subset=['entrez_id'])
305310

306311
# mapping improve sample id'samples_df

0 commit comments

Comments
 (0)