@@ -154,7 +154,9 @@ def read_xset_matrix(fn_matrix, first_sample, separator="\t", mapping={"mz": "mz
154
154
return pd .concat ([df_peaklist , df_matrix ], axis = 1 )
155
155
156
156
157
- def combine_peaklist_matrix (fn_peaklist , fn_matrix , separator = "\t " , mapping = {"name" : "name" , "mz" : "mz" , "rt" : "rt" }, merge_on = "name" , samples_in_columns = True ):
157
+ def combine_peaklist_matrix (fn_peaklist , fn_matrix , separator = "\t " , median_intensity = True ,
158
+ mapping = {"name" : "name" , "mz" : "mz" , "rt" : "rt" , "intensity" : "intensity" },
159
+ merge_on = "name" , samples_in_columns = True ):
158
160
if "mz" not in mapping and "rt" not in mapping and "name" not in mapping :
159
161
raise ValueError ("Incorrect column mapping: provide column names for mz, and name" )
160
162
@@ -176,7 +178,12 @@ def combine_peaklist_matrix(fn_peaklist, fn_matrix, separator="\t", mapping={"na
176
178
df_peaklist .columns = ["name" , "mz" , "rt" ]
177
179
178
180
df_matrix = df_matrix .rename (columns = {mapping ["name" ]: 'name' })
179
- df_peaklist ["intensity" ] = pd .Series (df_matrix .median (axis = 1 , skipna = True ), index = df_matrix .index )
181
+
182
+ if mapping ["intensity" ] not in df_peaklist .columns :
183
+ if median_intensity :
184
+ df_peaklist ["intensity" ] = pd .Series (df_matrix .median (axis = 1 , skipna = True ), index = df_matrix .index )
185
+ else :
186
+ df_peaklist ["intensity" ] = pd .Series (df_matrix .mean (axis = 1 , skipna = True ), index = df_matrix .index )
180
187
181
188
if len (df_peaklist [mapping ["name" ]].unique ()) != len (df_peaklist [mapping ["name" ]]):
182
189
raise ValueError ("Peaklist: Values column '{}' are not unique" .format (mapping ["name" ]))
@@ -187,7 +194,8 @@ def combine_peaklist_matrix(fn_peaklist, fn_matrix, separator="\t", mapping={"na
187
194
188
195
189
196
190
- def read_peaklist (fn_peaklist , separator = "\t " , mapping = {"name" : "name" , "mz" : "mz" , "rt" : "rt" , "intensity" : "intensity" }):
197
+ def read_peaklist (fn_peaklist , separator = "\t " ,
198
+ mapping = {"name" : "name" , "mz" : "mz" , "rt" : "rt" , "intensity" : "intensity" }):
191
199
192
200
df_peaklist = pd .read_csv (fn_peaklist , header = 0 , sep = separator , dtype = {"name" : str }, float_precision = "round_trip" )
193
201
if mapping ["mz" ] not in df_peaklist .columns .values or mapping ["intensity" ] not in df_peaklist .columns .values :
@@ -200,9 +208,12 @@ def read_peaklist(fn_peaklist, separator="\t", mapping={"name": "name", "mz": "m
200
208
df_peaklist .columns = ["mz" , "intensity" ]
201
209
df_peaklist .insert (0 , "name" , [str (x ).replace ("." ,"_" ) for x in df_peaklist [mapping ["mz" ]]])
202
210
df_peaklist ["mz" ] = df_peaklist ["mz" ].astype (float )
211
+ df_peaklist ["intensity" ] = df_peaklist ["intensity" ].astype (float )
203
212
else :
204
213
df_peaklist = df_peaklist [[mapping ["name" ], mapping ["mz" ], mapping ["intensity" ]]]
205
214
df_peaklist .columns = ["name" , "mz" , "intensity" ]
215
+ df_peaklist ["mz" ] = df_peaklist ["mz" ].astype (float )
216
+ df_peaklist ["intensity" ] = df_peaklist ["intensity" ].astype (float )
206
217
df_peaklist .insert (2 , "rt" , 0.0 )
207
218
elif "rt" in mapping :
208
219
if mapping ["name" ] in df_peaklist .columns .values :
@@ -223,4 +234,8 @@ def read_peaklist(fn_peaklist, separator="\t", mapping={"name": "name", "mz": "m
223
234
df_peaklist = df_peaklist [[mapping ["name" ], mapping ["mz" ], mapping ["rt" ], mapping ["intensity" ]]]
224
235
df_peaklist .columns = ["name" , "mz" , "rt" , "intensity" ]
225
236
237
+ df_peaklist ["mz" ] = df_peaklist ["mz" ].astype (float )
238
+ df_peaklist ["rt" ] = df_peaklist ["rt" ].astype (float )
239
+ df_peaklist ["intensity" ] = df_peaklist ["intensity" ].astype (float )
240
+
226
241
return df_peaklist
0 commit comments