|
18 | 18 | # apply overload causes problems in printing of dataframes |
19 | 19 | # so it is included here to make nested call visualization work |
20 | 20 | old_apply = pd.Series.apply |
| 21 | + |
| 22 | + |
21 | 23 | @pf.register_series_method |
22 | 24 | def apply(s: pd.Series, func) -> pd.Series: |
23 | 25 | ret = old_apply(s, func) |
24 | 26 | return ret |
25 | 27 |
|
| 28 | + |
26 | 29 | @pf.register_series_method |
27 | 30 | def load_images(file_pathes: pd.Series) -> pd.DataFrame: |
28 | | - #ipdb.set_trace() |
| 31 | + # ipdb.set_trace() |
29 | 32 | df = pd.DataFrame() |
30 | 33 | for file_path in file_pathes: |
31 | 34 | x_image = imread(file_path) |
32 | 35 | im_name = os.path.basename(file_path) |
33 | | - df = df.append({'im_name': im_name, 'image': x_image}, ignore_index = True) |
34 | | - |
| 36 | + df = df.append( |
| 37 | + {"im_name": im_name, "image": x_image}, ignore_index=True |
| 38 | + ) |
| 39 | + |
35 | 40 | return df |
36 | 41 |
|
| 42 | + |
37 | 43 | @pf.register_dataframe_method |
38 | 44 | def subplot(df: pd.DataFrame, *, image_col, title_col, title): |
39 | 45 | return df |
40 | 46 |
|
| 47 | + |
41 | 48 | @pf.register_dataframe_method |
42 | 49 | def binarize_images(df: pd.DataFrame, thresholding_method) -> pd.DataFrame: |
43 | | - df['gray_leaf'] = df.image.apply(rgb2gray) |
44 | | - df['binarized'] = None |
| 50 | + df["gray_leaf"] = df.image.apply(rgb2gray) |
| 51 | + df["binarized"] = None |
45 | 52 | for t in df.itertuples(): |
46 | 53 | thresh = thresholding_method(t.gray_leaf) |
47 | | - df.at[t.Index, 'binarized'] = (t.gray_leaf < thresh) |
| 54 | + df.at[t.Index, "binarized"] = t.gray_leaf < thresh |
48 | 55 | return df |
49 | 56 |
|
| 57 | + |
50 | 58 | @pf.register_dataframe_method |
51 | 59 | def morphology(df: pd.DataFrame) -> pd.DataFrame: |
52 | | - df['closed'] = df.binarized.apply(area_closing) |
53 | | - df['opened'] = df.closed.apply(area_opening) |
| 60 | + df["closed"] = df.binarized.apply(area_closing) |
| 61 | + df["opened"] = df.closed.apply(area_opening) |
54 | 62 | return df |
55 | 63 |
|
| 64 | + |
56 | 65 | @pf.register_dataframe_method |
57 | 66 | def labeling(df): |
58 | | - df['label_im'] = df.opened.apply(label) |
59 | | - df['regions'] = df.label_im.apply(regionprops) |
| 67 | + df["label_im"] = df.opened.apply(label) |
| 68 | + df["regions"] = df.label_im.apply(regionprops) |
60 | 69 | return df |
61 | 70 |
|
| 71 | + |
62 | 72 | @pf.register_dataframe_method |
63 | 73 | def get_properties_of_each_region(df: pd.DataFrame) -> pd.DataFrame: |
64 | | - properties = ['area','convex_area','bbox_area', |
65 | | - 'major_axis_length', 'minor_axis_length', |
66 | | - 'perimeter', 'equivalent_diameter', |
67 | | - 'mean_intensity', 'solidity', 'eccentricity'] |
| 74 | + properties = [ |
| 75 | + "area", |
| 76 | + "convex_area", |
| 77 | + "bbox_area", |
| 78 | + "major_axis_length", |
| 79 | + "minor_axis_length", |
| 80 | + "perimeter", |
| 81 | + "equivalent_diameter", |
| 82 | + "mean_intensity", |
| 83 | + "solidity", |
| 84 | + "eccentricity", |
| 85 | + ] |
68 | 86 | res_df = [] |
69 | 87 | for t in df.itertuples(): |
70 | | - #ipdb.set_trace() |
71 | | - p_df = pd.DataFrame(regionprops_table(t.label_im, t.gray_leaf, properties=properties)) |
| 88 | + # ipdb.set_trace() |
| 89 | + p_df = pd.DataFrame( |
| 90 | + regionprops_table(t.label_im, t.gray_leaf, properties=properties) |
| 91 | + ) |
72 | 92 | p_df = p_df[(p_df.index != 0) & (p_df.area > 100)] |
73 | | - p_df['im_name'] = t.im_name |
| 93 | + p_df["im_name"] = t.im_name |
74 | 94 | res_df.append(p_df) |
75 | 95 | return pd.concat(res_df) |
76 | 96 |
|
| 97 | + |
77 | 98 | @pf.register_dataframe_method |
78 | 99 | def apply_feature_engeneering(df: pd.DataFrame) -> pd.DataFrame: |
79 | | - df['ratio_length'] = (df['major_axis_length'] / df['minor_axis_length']) |
80 | | - df['perimeter_ratio_major'] = (df['perimeter'] / df['major_axis_length']) |
81 | | - df['perimeter_ratio_minor'] = (df['perimeter'] / df['minor_axis_length']) |
82 | | - df['area_ratio_convex'] = df['area'] / df['convex_area'] |
83 | | - df['area_ratio_bbox'] = df['area'] / df['bbox_area'] |
84 | | - df['peri_over_dia'] = df['perimeter'] / df['equivalent_diameter'] |
85 | | - final_df = df[df.drop('type', axis=1).columns].astype(float) |
| 100 | + df["ratio_length"] = df["major_axis_length"] / df["minor_axis_length"] |
| 101 | + df["perimeter_ratio_major"] = df["perimeter"] / df["major_axis_length"] |
| 102 | + df["perimeter_ratio_minor"] = df["perimeter"] / df["minor_axis_length"] |
| 103 | + df["area_ratio_convex"] = df["area"] / df["convex_area"] |
| 104 | + df["area_ratio_bbox"] = df["area"] / df["bbox_area"] |
| 105 | + df["peri_over_dia"] = df["perimeter"] / df["equivalent_diameter"] |
| 106 | + final_df = df[df.drop("type", axis=1).columns].astype(float) |
86 | 107 | final_df = final_df.replace(np.inf, 0) |
87 | | - final_df['type'] = df['type'] |
| 108 | + final_df["type"] = df["type"] |
88 | 109 | return final_df |
89 | 110 |
|
| 111 | + |
90 | 112 | file_pathes = pd.Series(glob.glob("dataset/*.jpg")) |
91 | 113 |
|
92 | 114 | with pyjviz.CB("initial-phase") as cc: |
93 | | - initial_phase_df = (file_pathes |
94 | | - .load_images()#.subplot(image_col = 'image', title_col = 'im_name', title = '(Original Image by Gino Borja, AIM)') |
95 | | - .binarize_images(threshold_otsu)#.subplot(image_col = 'binarized', title_col = file_pathes, title = 'binarized') |
96 | | - .morphology()#.subplot(image_col = 'opened', title_col = file_pathes, title = 'opened') |
97 | | - .labeling()#.subplot(image_col = 'label_im', title_col = file_pathes, title = 'labeled') |
98 | | - ) |
99 | | -if 1: |
| 115 | + initial_phase_df = ( |
| 116 | + file_pathes.load_images() # .subplot(image_col = 'image', title_col = 'im_name', title = '(Original Image by Gino Borja, AIM)') |
| 117 | + .binarize_images( |
| 118 | + threshold_otsu |
| 119 | + ) # .subplot(image_col = 'binarized', title_col = file_pathes, title = 'binarized') |
| 120 | + .morphology() # .subplot(image_col = 'opened', title_col = file_pathes, title = 'opened') |
| 121 | + .labeling() # .subplot(image_col = 'label_im', title_col = file_pathes, title = 'labeled') |
| 122 | + ) |
| 123 | +if 1: |
100 | 124 | with pyjviz.CB("build-features"): |
101 | | - final_df = (initial_phase_df |
102 | | - .get_properties_of_each_region() |
103 | | - .assign(type = lambda x: x.im_name.apply(lambda x: x.split('.')[0])) |
104 | | - .drop(columns = 'im_name') |
105 | | - .apply_feature_engeneering() |
106 | | - ) |
107 | | - |
108 | | -pyjviz.save_dot(vertical = True) |
| 125 | + final_df = ( |
| 126 | + initial_phase_df.get_properties_of_each_region() |
| 127 | + .assign(type=lambda x: x.im_name.apply(lambda x: x.split(".")[0])) |
| 128 | + .drop(columns="im_name") |
| 129 | + .apply_feature_engeneering() |
| 130 | + ) |
109 | 131 |
|
| 132 | +pyjviz.save_dot(vertical=True) |
0 commit comments