|
| 1 | +import pandas as pd |
| 2 | +import numpy as np |
| 3 | +import sklearn |
| 4 | +import os |
| 5 | +import copy |
| 6 | +from sklearn.metrics import roc_curve, auc |
| 7 | +from sklearn.metrics import accuracy_score, classification_report |
| 8 | + |
| 9 | + |
| 10 | + |
| 11 | +def manhattan_distance2(array, dataframe): |
| 12 | + # 检查参数是否合法 |
| 13 | + if not isinstance(array, np.ndarray) or not isinstance(dataframe, pd.DataFrame): |
| 14 | + print("Invalid arguments. Please provide a numpy array and a pandas dataframe.") |
| 15 | + return None |
| 16 | + # 获取数组和Dataframe的维度 |
| 17 | + array_shape = array.shape |
| 18 | + dataframe_shape = dataframe.shape |
| 19 | + # 检查数组和Dataframe的维度是否匹配 |
| 20 | + if array_shape[0] != dataframe_shape[1]: |
| 21 | + print("Dimension mismatch. The array and the dataframe must have the same number of rows.") |
| 22 | + return None |
| 23 | + # 创建一个空的Dataframe,用于存储结果 |
| 24 | + distance_arr = np.array([]) |
| 25 | + |
| 26 | + # 遍历Dataframe的每一列 |
| 27 | + for ind in dataframe.index: |
| 28 | + # 计算数组和Dataframe的该列的曼哈顿距离 |
| 29 | + if dataframe.loc[ind]['y'] >= array[1]: |
| 30 | + distance = 1 + np.linalg.norm(array - dataframe.loc[ind].values, axis=0, ord=1) / np.sum(np.abs(array)) |
| 31 | + # elif np.sum(np.abs(dataframe.loc[ind].values)) < np.sum(np.abs(array)): |
| 32 | + else: |
| 33 | + distance = 1 - np.linalg.norm(array - dataframe.loc[ind].values, axis=0, ord=1) / np.sum(np.abs(array)) |
| 34 | + # 将结果添加到结果Dataframe中 |
| 35 | + distance_arr = np.append(distance_arr, distance) |
| 36 | + # 返回结果Dataframe |
| 37 | + return distance_arr |
| 38 | + |
| 39 | + |
| 40 | +def immune_scror_calculate2(input_data_dict, features, refer, return_t='dict'): |
| 41 | + if isinstance(input_data_dict, pd.DataFrame): |
| 42 | + immune_score_df = copy.deepcopy(input_data_dict) |
| 43 | + immune_score_df['Defense Immune Score'] = manhattan_distance2(array=refer[features].to_numpy(), |
| 44 | + dataframe=immune_score_df[features]) |
| 45 | + return immune_score_df |
| 46 | + |
| 47 | + elif isinstance(input_data_dict, dict): |
| 48 | + input_data_df = copy.deepcopy(input_data_dict) |
| 49 | + |
| 50 | + for k, v in input_data_df.items(): |
| 51 | + v["Disease Group"] = k |
| 52 | + v['Defense Immune Score'] = manhattan_distance2(array=refer[features].to_numpy(), |
| 53 | + dataframe=v[features]) |
| 54 | + if return_t == 'dict': |
| 55 | + immune_score_df = {k: v[['Disease Group', 'Defense Immune Score']] for k, v in input_data_df.items()} |
| 56 | + else: |
| 57 | + # 检查是否存在列 'Disease Group' |
| 58 | + if 'Disease Group' in input_data_df.columns: |
| 59 | + immune_score_df = input_data_df[['Disease Group', 'Defense Immune Score']] |
| 60 | + else: |
| 61 | + immune_score_df = input_data_df[['Defense Immune Score']] |
| 62 | + |
| 63 | + return immune_score_df |
| 64 | + else: |
| 65 | + raise ValueError('to this error ') |
0 commit comments