@@ -31,89 +31,89 @@ def convert_to_complex(s):
3131
3232
3333def convert_complex_value (x ):
34- """转换单个值的辅助函数 ."""
34+ """Helper function to convert a single value ."""
3535 if isinstance (x , str ):
3636 try :
3737 complex_val = complex (x .strip ('()' ))
38- # 如果虚部接近0,返回实部
38+ # If imaginary part is close to 0, return real part
3939 if abs (complex_val .imag ) < 1e-10 :
4040 return float (complex_val .real )
4141 return complex_val
4242 except ValueError :
4343 return x
4444 elif isinstance (x , complex ):
45- # 如果虚部接近0,返回实部
45+ # If imaginary part is close to 0, return real part
4646 if abs (x .imag ) < 1e-10 :
4747 return float (x .real )
4848 return x
4949 return x
5050
5151
5252def unify_complex_float_types_cell (df ):
53- """按单元格处理 ."""
53+ """Process by cell ."""
5454 for col in df .columns :
5555 for idx in df .index :
5656 df .at [idx , col ] = convert_complex_value (df .at [idx , col ])
5757 return df
5858
5959
6060def unify_complex_float_types_row (df ):
61- """按行处理 ."""
61+ """Process by row ."""
6262 for idx in df .index :
6363 df .loc [idx ] = df .loc [idx ].apply (convert_complex_value )
6464 return df
6565
6666
6767def unify_complex_float_types (df ):
68- """按列处理 ."""
68+ """Process by column ."""
6969 for col in df .columns :
70- # 跳过非数值列
70+ # Skip non-numeric columns
7171 if not pd .api .types .is_numeric_dtype (df [col ]):
7272 continue
7373
74- # 检查是否包含复数
74+ # Check if contains complex numbers
7575 has_complex = df [col ].apply (lambda x : isinstance (x , complex )).any ()
7676
7777 if has_complex :
78- # 将列转换为复数并处理
78+ # Convert column to complex and process
7979 df [col ] = df [col ].apply (convert_complex_value )
8080
8181 return df
8282
8383
8484def process_excel_files (excel_files ):
85- # 存储所有数据的列表
85+ # List to store all data
8686 all_data = []
8787
8888 for file_path in excel_files :
89- # 获取文件名(不含扩展名)
89+ # Get filename (without extension)
9090 file_name = os .path .splitext (os .path .basename (file_path ))[0 ]
9191
92- # 读取Excel文件中的所有表
92+ # Read all sheets in Excel file
9393 excel = pd .ExcelFile (file_path )
9494
95- # 处理每个表
95+ # Process each sheet
9696 for sheet_name in excel .sheet_names :
97- # 读取数据
97+ # Read data
9898 df = pd .read_excel (file_path , sheet_name = sheet_name )
9999
100- # 转置数据
100+ # Transpose data
101101 df_transposed = df .transpose ()
102102
103- # 添加文件名和表名列
103+ # Add filename and sheet name columns
104104 df_transposed ['file_name' ] = file_name
105105 df_transposed ['sheet_name' ] = sheet_name
106106
107- # 将数据添加到列表中
107+ # Add data to list
108108 all_data .append (df_transposed )
109109
110- # 合并所有数据
110+ # Merge all data
111111 final_df = pd .concat (all_data , ignore_index = True )
112112
113- # 统一数据类型
113+ # Unify data types
114114 final_df = unify_complex_float_types (final_df )
115115
116- # 保存为CSV
116+ # Save as CSV
117117 output_path = os .path .join (os .path .dirname (excel_files [0 ]), 'combined_output.csv' )
118118 final_df .to_csv (output_path , encoding = 'utf-8-sig' , index = True )
119119
@@ -128,8 +128,10 @@ def process_excel_files(excel_files):
128128 for sheet_name in excel .sheet_names :
129129 df = pd .read_excel (file_path , sheet_name = sheet_name , index_col = 0 )
130130 df = df [~ df .index .duplicated (keep = 'last' )]
131- # df=unify_complex_float_types_row(df) #TODO 导致一些复数失真,但因为比较的时候只用实部,问题不大
132- df = unify_complex_float_types_cell (df ) #TODO 导致一些复数失真,但因为比较的时候只用实部,问题不大
131+ # df=unify_complex_float_types_row(df) #Some complex numbers may lose precision, but it's not a big issue since only real parts are used for comparison
132+ df = unify_complex_float_types_cell (
133+ df
134+ ) #Some complex numbers may lose precision, but it's not a big issue since only real parts are used for comparison
133135 if os .path .exists (SIMILARITYDIR / f"data/new_sim/{ tissue } _similarity.xlsx" ):
134136 mode = 'a'
135137 if_sheet_exists = "replace"
@@ -141,4 +143,4 @@ def process_excel_files(excel_files):
141143 df .to_excel (writer , sheet_name = sheet_name )
142144 excel_files = [SIMILARITYDIR / f"data/new_sim/{ tissue } _similarity.xlsx" for tissue in tissues ]
143145 output_file = process_excel_files (excel_files )
144- print (f"已将合并后的数据保存到 : { output_file } " )
146+ print (f"Combined data has been saved to : { output_file } " )
0 commit comments