|
1 | 1 | import io |
| 2 | +import logging |
2 | 3 | import string |
3 | 4 |
|
4 | 5 | import matplotlib.pyplot as plt |
|
9 | 10 |
|
10 | 11 | from .primitives import Table |
11 | 12 |
|
| 13 | +logging.basicConfig( |
| 14 | + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" |
| 15 | +) |
| 16 | + |
12 | 17 |
|
13 | 18 | def plot_security_report(table: Table) -> io.BytesIO: |
14 | | - # Data preprocessing |
15 | | - data = pd.DataFrame(table) |
16 | | - |
17 | | - # Sort by failure rate and reset index |
18 | | - data = data.sort_values("failureRate", ascending=False).reset_index(drop=True) |
19 | | - data["identifier"] = generate_identifiers(data) |
20 | | - |
21 | | - # Plot setup |
22 | | - fig, ax = plt.subplots(figsize=(12, 10), subplot_kw={"projection": "polar"}) |
23 | | - fig.set_facecolor("#f0f0f0") |
24 | | - ax.set_facecolor("#f0f0f0") |
25 | | - |
26 | | - # Styling parameters |
27 | | - colors = ["#6C5B7B", "#C06C84", "#F67280", "#F8B195"][::-1] # Pastel palette |
28 | | - # colors = ["#440154", "#3b528b", "#21908c", "#5dc863"] # Viridis-inspired palette |
29 | | - cmap = LinearSegmentedColormap.from_list("custom", colors, N=256) |
30 | | - norm = Normalize(vmin=data["tokens"].min(), vmax=data["tokens"].max()) |
31 | | - |
32 | | - # Compute angles for the polar plot |
33 | | - angles = np.linspace(0, 2 * np.pi, len(data), endpoint=False) |
34 | | - |
35 | | - # Plot bars |
36 | | - bars = ax.bar( |
37 | | - angles, |
38 | | - data["failureRate"], |
39 | | - width=0.5, |
40 | | - color=[cmap(norm(t)) for t in data["tokens"]], |
41 | | - alpha=0.8, |
42 | | - label="Failure Rate %", |
43 | | - ) |
44 | | - |
45 | | - # Customize polar plot |
46 | | - ax.set_theta_offset(np.pi / 2) |
47 | | - ax.set_theta_direction(-1) |
48 | | - ax.set_ylim(0, max(data["failureRate"]) * 1.1) # Add some headroom |
49 | | - |
50 | | - # Add labels (now using identifiers) |
51 | | - ax.set_xticks(angles) |
52 | | - ax.set_xticklabels(data["identifier"], fontsize=10, fontweight="bold") |
53 | | - |
54 | | - # Add circular grid lines |
55 | | - ax.yaxis.grid(True, color="gray", linestyle=":", alpha=0.5) |
56 | | - ax.set_yticks(np.arange(0, max(data["failureRate"]), 20)) |
57 | | - ax.set_yticklabels( |
58 | | - [f"{x}%" for x in range(0, int(max(data["failureRate"])), 20)], fontsize=8 |
59 | | - ) |
60 | | - |
61 | | - # Add radial lines |
62 | | - ax.vlines( |
63 | | - angles, |
64 | | - 0, |
65 | | - max(data["failureRate"]) * 1.1, |
66 | | - color="gray", |
67 | | - linestyle=":", |
68 | | - alpha=0.5, |
69 | | - ) |
70 | | - |
71 | | - # Color bar for token count |
72 | | - sm = ScalarMappable(cmap=cmap, norm=norm) |
73 | | - sm.set_array([]) |
74 | | - cbar = fig.colorbar(sm, ax=ax, orientation="horizontal", pad=0.08, aspect=30) |
75 | | - cbar.set_label("Token Count (k)", fontsize=10, fontweight="bold") |
76 | | - |
77 | | - # Title and caption |
78 | | - fig.suptitle( |
79 | | - "Security Report for Different Modules", fontsize=16, fontweight="bold", y=1.02 |
80 | | - ) |
81 | | - caption = "Report generated by https://github.com/msoedov/agentic_security" |
82 | | - fig.text( |
83 | | - 0.5, |
84 | | - 0.02, |
85 | | - caption, |
86 | | - fontsize=8, |
87 | | - ha="center", |
88 | | - va="bottom", |
89 | | - alpha=0.7, |
90 | | - fontweight="bold", |
91 | | - ) |
92 | | - |
93 | | - # Add failure rate values on the bars |
94 | | - for angle, radius, bar, identifier in zip( |
95 | | - angles, data["failureRate"], bars, data["identifier"] |
96 | | - ): |
97 | | - ax.text( |
98 | | - angle, |
99 | | - radius, |
100 | | - f"{identifier}: {radius:.1f}%", |
| 19 | + try: |
| 20 | + # Data preprocessing |
| 21 | + if not isinstance(table, Table): |
| 22 | + raise TypeError("Input argument must be a pandas DataFrame.") |
| 23 | + logging.info("Data preprocessing started.") |
| 24 | + |
| 25 | + data = pd.DataFrame(table) |
| 26 | + |
| 27 | + # Sort by failure rate and reset index |
| 28 | + data = data.sort_values("failureRate", ascending=False).reset_index(drop=True) |
| 29 | + data["identifier"] = generate_identifiers(data) |
| 30 | + |
| 31 | + # Plot setup |
| 32 | + fig, ax = plt.subplots(figsize=(12, 10), subplot_kw={"projection": "polar"}) |
| 33 | + fig.set_facecolor("#f0f0f0") |
| 34 | + ax.set_facecolor("#f0f0f0") |
| 35 | + logging.info("Plot setup complete.") |
| 36 | + |
| 37 | + # Styling parameters |
| 38 | + colors = ["#6C5B7B", "#C06C84", "#F67280", "#F8B195"][::-1] # Pastel palette |
| 39 | + cmap = LinearSegmentedColormap.from_list("custom", colors, N=256) |
| 40 | + norm = Normalize(vmin=data["tokens"].min(), vmax=data["tokens"].max()) |
| 41 | + |
| 42 | + # Compute angles for the polar plot |
| 43 | + angles = np.linspace(0, 2 * np.pi, len(data), endpoint=False) |
| 44 | + |
| 45 | + # Plot bars |
| 46 | + bars = ax.bar( |
| 47 | + angles, |
| 48 | + data["failureRate"], |
| 49 | + width=0.5, |
| 50 | + color=[cmap(norm(t)) for t in data["tokens"]], |
| 51 | + alpha=0.8, |
| 52 | + label="Failure Rate %", |
| 53 | + ) |
| 54 | + |
| 55 | + # Customize polar plot |
| 56 | + ax.set_theta_offset(np.pi / 2) |
| 57 | + ax.set_theta_direction(-1) |
| 58 | + ax.set_ylim(0, max(data["failureRate"]) * 1.1) # Add some headroom |
| 59 | + |
| 60 | + # Add labels (now using identifiers) |
| 61 | + ax.set_xticks(angles) |
| 62 | + ax.set_xticklabels(data["identifier"], fontsize=10, fontweight="bold") |
| 63 | + |
| 64 | + # Add circular grid lines |
| 65 | + ax.yaxis.grid(True, color="gray", linestyle=":", alpha=0.5) |
| 66 | + ax.set_yticks(np.arange(0, max(data["failureRate"]), 20)) |
| 67 | + ax.set_yticklabels( |
| 68 | + [f"{x}%" for x in range(0, int(max(data["failureRate"])), 20)], fontsize=8 |
| 69 | + ) |
| 70 | + |
| 71 | + # Add radial lines |
| 72 | + ax.vlines( |
| 73 | + angles, |
| 74 | + 0, |
| 75 | + max(data["failureRate"]) * 1.1, |
| 76 | + color="gray", |
| 77 | + linestyle=":", |
| 78 | + alpha=0.5, |
| 79 | + ) |
| 80 | + |
| 81 | + # Color bar for token count |
| 82 | + sm = ScalarMappable(cmap=cmap, norm=norm) |
| 83 | + sm.set_array([]) |
| 84 | + cbar = fig.colorbar(sm, ax=ax, orientation="horizontal", pad=0.08, aspect=30) |
| 85 | + cbar.set_label("Token Count (k)", fontsize=10, fontweight="bold") |
| 86 | + |
| 87 | + # Title and caption |
| 88 | + fig.suptitle( |
| 89 | + "Security Report for Different Modules", |
| 90 | + fontsize=16, |
| 91 | + fontweight="bold", |
| 92 | + y=1.02, |
| 93 | + ) |
| 94 | + caption = "Report generated by https://github.com/msoedov/agentic_security" |
| 95 | + fig.text( |
| 96 | + 0.5, |
| 97 | + 0.02, |
| 98 | + caption, |
| 99 | + fontsize=8, |
101 | 100 | ha="center", |
102 | 101 | va="bottom", |
103 | | - rotation=angle * 180 / np.pi - 90, |
104 | | - rotation_mode="anchor", |
105 | | - fontsize=7, |
| 102 | + alpha=0.7, |
106 | 103 | fontweight="bold", |
107 | | - color="black", |
108 | 104 | ) |
109 | 105 |
|
110 | | - # Add a table with identifiers and dataset names |
111 | | - table_data = [["Threat"]] + [ |
112 | | - [f"{identifier}: {module} ({fr:.1f}%)"] |
113 | | - for identifier, fr, module in zip( |
114 | | - data["identifier"], data["failureRate"], data["module"] |
115 | | - ) |
116 | | - ] |
117 | | - table = ax.table( |
118 | | - cellText=table_data, |
119 | | - loc="right", |
120 | | - cellLoc="left", |
121 | | - ) |
122 | | - table.auto_set_font_size(False) |
123 | | - table.set_fontsize(8) |
124 | | - |
125 | | - # Adjust table style |
126 | | - table.scale(1, 0.7) |
127 | | - |
128 | | - for (row, col), cell in table.get_celld().items(): |
129 | | - cell.set_edgecolor("none") |
130 | | - cell.set_facecolor("#f0f0f0" if row % 2 == 0 else "#e0e0e0") |
131 | | - cell.set_alpha(0.8) |
132 | | - cell.set_text_props(wrap=True) |
133 | | - if row == 0: |
134 | | - cell.set_text_props(fontweight="bold") |
135 | | - |
136 | | - # Adjust layout and save |
137 | | - |
138 | | - plt.tight_layout() |
139 | | - buf = io.BytesIO() |
140 | | - plt.savefig(buf, format="png", dpi=300, bbox_inches="tight") |
141 | | - plt.close(fig) |
142 | | - buf.seek(0) |
143 | | - return buf |
| 106 | + # Add failure rate values on the bars |
| 107 | + for angle, radius, bar, identifier in zip( |
| 108 | + angles, data["failureRate"], bars, data["identifier"] |
| 109 | + ): |
| 110 | + ax.text( |
| 111 | + angle, |
| 112 | + radius, |
| 113 | + f"{identifier}: {radius:.1f}%", |
| 114 | + ha="center", |
| 115 | + va="bottom", |
| 116 | + rotation=angle * 180 / np.pi - 90, |
| 117 | + rotation_mode="anchor", |
| 118 | + fontsize=7, |
| 119 | + fontweight="bold", |
| 120 | + color="black", |
| 121 | + ) |
| 122 | + |
| 123 | + # Add a table with identifiers and dataset names |
| 124 | + table_data = [["Threat"]] + [ |
| 125 | + [f"{identifier}: {module} ({fr:.1f}%)"] |
| 126 | + for identifier, fr, module in zip( |
| 127 | + data["identifier"], data["failureRate"], data["module"] |
| 128 | + ) |
| 129 | + ] |
| 130 | + table = ax.table(cellText=table_data, loc="right", cellLoc="left") |
| 131 | + table.auto_set_font_size(False) |
| 132 | + table.set_fontsize(8) |
| 133 | + |
| 134 | + # Adjust table style |
| 135 | + table.scale(1, 0.7) |
| 136 | + for (row, col), cell in table.get_celld().items(): |
| 137 | + cell.set_edgecolor("none") |
| 138 | + cell.set_facecolor("#f0f0f0" if row % 2 == 0 else "#e0e0e0") |
| 139 | + cell.set_alpha(0.8) |
| 140 | + cell.set_text_props(wrap=True) |
| 141 | + if row == 0: |
| 142 | + cell.set_text_props(fontweight="bold") |
| 143 | + |
| 144 | + # Adjust layout and save |
| 145 | + plt.tight_layout() |
| 146 | + buf = io.BytesIO() |
| 147 | + plt.savefig(buf, format="png", dpi=300, bbox_inches="tight") |
| 148 | + plt.close(fig) |
| 149 | + buf.seek(0) |
| 150 | + logging.info("Report successfully generated and saved to buffer.") |
| 151 | + return buf |
| 152 | + |
| 153 | + except Exception as e: |
| 154 | + logging.error(f"Error in generating the security report: {e}") |
| 155 | + raise |
144 | 156 |
|
145 | 157 |
|
146 | 158 | def generate_identifiers(data: pd.DataFrame) -> list[str]: |
147 | | - data_length = len(data) |
148 | | - alphabet = string.ascii_uppercase |
149 | | - num_letters = len(alphabet) |
150 | | - |
151 | | - identifiers = [] |
152 | | - for i in range(data_length): |
153 | | - letter_index = i // num_letters |
154 | | - number = (i % num_letters) + 1 |
155 | | - identifier = f"{alphabet[letter_index]}{number}" |
156 | | - identifiers.append(identifier) |
157 | | - |
158 | | - return identifiers |
| 159 | + try: |
| 160 | + if not isinstance(data, pd.DataFrame): |
| 161 | + raise TypeError("Input argument must be a pandas DataFrame.") |
| 162 | + |
| 163 | + data_length = len(data) |
| 164 | + if data_length == 0: |
| 165 | + raise ValueError("DataFrame cannot be empty.") |
| 166 | + |
| 167 | + alphabet = string.ascii_uppercase |
| 168 | + num_letters = len(alphabet) |
| 169 | + max_identifiers = num_letters * num_letters |
| 170 | + |
| 171 | + if data_length > max_identifiers: |
| 172 | + raise OverflowError( |
| 173 | + f"Cannot generate more than {max_identifiers} unique identifiers." |
| 174 | + ) |
| 175 | + |
| 176 | + identifiers = [] |
| 177 | + for i in range(data_length): |
| 178 | + letter_index = i // num_letters |
| 179 | + if letter_index >= num_letters: |
| 180 | + raise IndexError("Identifier generation exceeded the supported range.") |
| 181 | + number = (i % num_letters) + 1 |
| 182 | + identifier = f"{alphabet[letter_index]}{number}" |
| 183 | + identifiers.append(identifier) |
| 184 | + |
| 185 | + return identifiers |
| 186 | + |
| 187 | + except (TypeError, ValueError, OverflowError, IndexError) as e: |
| 188 | + logging.error(f"Error in generate_identifiers: {e}") |
| 189 | + raise |
0 commit comments