Skip to content

Commit 058c9a2

Browse files
committed
update comment
1 parent 34bcfac commit 058c9a2

File tree

1 file changed

+25
-23
lines changed

1 file changed

+25
-23
lines changed

examples/atlas/sc_similarity_examples/similarity/process_tissue_similarity_matrices.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -31,89 +31,89 @@ def convert_to_complex(s):
3131

3232

3333
def convert_complex_value(x):
34-
"""转换单个值的辅助函数."""
34+
"""Helper function to convert a single value."""
3535
if isinstance(x, str):
3636
try:
3737
complex_val = complex(x.strip('()'))
38-
# 如果虚部接近0,返回实部
38+
# If imaginary part is close to 0, return real part
3939
if abs(complex_val.imag) < 1e-10:
4040
return float(complex_val.real)
4141
return complex_val
4242
except ValueError:
4343
return x
4444
elif isinstance(x, complex):
45-
# 如果虚部接近0,返回实部
45+
# If imaginary part is close to 0, return real part
4646
if abs(x.imag) < 1e-10:
4747
return float(x.real)
4848
return x
4949
return x
5050

5151

5252
def unify_complex_float_types_cell(df):
53-
"""按单元格处理."""
53+
"""Process by cell."""
5454
for col in df.columns:
5555
for idx in df.index:
5656
df.at[idx, col] = convert_complex_value(df.at[idx, col])
5757
return df
5858

5959

6060
def unify_complex_float_types_row(df):
61-
"""按行处理."""
61+
"""Process by row."""
6262
for idx in df.index:
6363
df.loc[idx] = df.loc[idx].apply(convert_complex_value)
6464
return df
6565

6666

6767
def unify_complex_float_types(df):
68-
"""按列处理."""
68+
"""Process by column."""
6969
for col in df.columns:
70-
# 跳过非数值列
70+
# Skip non-numeric columns
7171
if not pd.api.types.is_numeric_dtype(df[col]):
7272
continue
7373

74-
# 检查是否包含复数
74+
# Check if contains complex numbers
7575
has_complex = df[col].apply(lambda x: isinstance(x, complex)).any()
7676

7777
if has_complex:
78-
# 将列转换为复数并处理
78+
# Convert column to complex and process
7979
df[col] = df[col].apply(convert_complex_value)
8080

8181
return df
8282

8383

8484
def process_excel_files(excel_files):
85-
# 存储所有数据的列表
85+
# List to store all data
8686
all_data = []
8787

8888
for file_path in excel_files:
89-
# 获取文件名(不含扩展名)
89+
# Get filename (without extension)
9090
file_name = os.path.splitext(os.path.basename(file_path))[0]
9191

92-
# 读取Excel文件中的所有表
92+
# Read all sheets in Excel file
9393
excel = pd.ExcelFile(file_path)
9494

95-
# 处理每个表
95+
# Process each sheet
9696
for sheet_name in excel.sheet_names:
97-
# 读取数据
97+
# Read data
9898
df = pd.read_excel(file_path, sheet_name=sheet_name)
9999

100-
# 转置数据
100+
# Transpose data
101101
df_transposed = df.transpose()
102102

103-
# 添加文件名和表名列
103+
# Add filename and sheet name columns
104104
df_transposed['file_name'] = file_name
105105
df_transposed['sheet_name'] = sheet_name
106106

107-
# 将数据添加到列表中
107+
# Add data to list
108108
all_data.append(df_transposed)
109109

110-
# 合并所有数据
110+
# Merge all data
111111
final_df = pd.concat(all_data, ignore_index=True)
112112

113-
# 统一数据类型
113+
# Unify data types
114114
final_df = unify_complex_float_types(final_df)
115115

116-
# 保存为CSV
116+
# Save as CSV
117117
output_path = os.path.join(os.path.dirname(excel_files[0]), 'combined_output.csv')
118118
final_df.to_csv(output_path, encoding='utf-8-sig', index=True)
119119

@@ -128,8 +128,10 @@ def process_excel_files(excel_files):
128128
for sheet_name in excel.sheet_names:
129129
df = pd.read_excel(file_path, sheet_name=sheet_name, index_col=0)
130130
df = df[~df.index.duplicated(keep='last')]
131-
# df=unify_complex_float_types_row(df) #TODO 导致一些复数失真,但因为比较的时候只用实部,问题不大
132-
df = unify_complex_float_types_cell(df) #TODO 导致一些复数失真,但因为比较的时候只用实部,问题不大
131+
# df=unify_complex_float_types_row(df) #Some complex numbers may lose precision, but it's not a big issue since only real parts are used for comparison
132+
df = unify_complex_float_types_cell(
133+
df
134+
) #Some complex numbers may lose precision, but it's not a big issue since only real parts are used for comparison
133135
if os.path.exists(SIMILARITYDIR / f"data/new_sim/{tissue}_similarity.xlsx"):
134136
mode = 'a'
135137
if_sheet_exists = "replace"
@@ -141,4 +143,4 @@ def process_excel_files(excel_files):
141143
df.to_excel(writer, sheet_name=sheet_name)
142144
excel_files = [SIMILARITYDIR / f"data/new_sim/{tissue}_similarity.xlsx" for tissue in tissues]
143145
output_file = process_excel_files(excel_files)
144-
print(f"已将合并后的数据保存到: {output_file}")
146+
print(f"Combined data has been saved to: {output_file}")

0 commit comments

Comments
 (0)