-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_balanced_simple_3k_visualization.py
More file actions
142 lines (117 loc) · 6.17 KB
/
generate_balanced_simple_3k_visualization.py
File metadata and controls
142 lines (117 loc) · 6.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python3
"""
Balanced Strategy Training Results Visualization (3000 Episodes) - Simple Layout
Same layout as 1000-episode version but with 3000 episodes data
"""
import json
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import os
from pathlib import Path
def create_balanced_simple_visualization():
"""Create simple 4-subplot visualization matching the 1000-episode layout"""
try:
# Load training data
data_path = "outputs_pump_cbm_v047_balanced/training_history.json"
with open(data_path, 'r') as f:
data = json.load(f)
rewards = np.array(data['episode_rewards'])
costs = np.array(data['episode_costs'])
print(f"[SUCCESS] Loaded Balanced training data: {len(rewards)} episodes")
# Create figure with 2x2 subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 10))
fig.suptitle('Balanced Strategy - Training Results (3000 Episodes)', fontsize=16, fontweight='bold')
# Moving averages
window = 50
reward_ma = np.convolve(rewards, np.ones(window)/window, mode='valid')
cost_ma = np.convolve(costs, np.ones(window)/window, mode='valid')
# 1. Reward Progress (top-left)
ax1.plot(rewards, alpha=0.3, color='lightsalmon', linewidth=0.5)
ax1.plot(range(window-1, len(rewards)), reward_ma, color='darkorange', linewidth=2, label=f'Moving Average ({window}ep)')
ax1.set_title('Reward Progress', fontweight='bold')
ax1.set_xlabel('Episodes')
ax1.set_ylabel('Reward')
ax1.grid(True, alpha=0.3)
ax1.legend()
# Add final reward annotation
final_reward = reward_ma[-1] if len(reward_ma) > 0 else rewards[-1]
ax1.annotate(f'Final: {final_reward:.1f}',
xy=(0.02, 0.95), xycoords='axes fraction',
bbox=dict(boxstyle="round,pad=0.3", facecolor='moccasin', alpha=0.7),
fontsize=10, fontweight='bold')
# 2. Cost Progress (top-right)
ax2.plot(costs, alpha=0.3, color='lightblue', linewidth=0.5)
ax2.plot(range(window-1, len(costs)), cost_ma, color='steelblue', linewidth=2, label=f'Moving Average ({window}ep)')
ax2.set_title('Cost Progress', fontweight='bold')
ax2.set_xlabel('Episodes')
ax2.set_ylabel('Total Cost')
ax2.grid(True, alpha=0.3)
ax2.legend()
# Add final cost annotation
final_cost = cost_ma[-1] if len(cost_ma) > 0 else costs[-1]
ax2.annotate(f'Final: {final_cost:.0f}',
xy=(0.02, 0.95), xycoords='axes fraction',
bbox=dict(boxstyle="round,pad=0.3", facecolor='lightblue', alpha=0.7),
fontsize=10, fontweight='bold')
# 3. Final 100 Episodes - Reward Distribution (bottom-left)
final_100_rewards = rewards[-100:]
mean_reward = np.mean(final_100_rewards)
std_reward = np.std(final_100_rewards)
ax3.hist(final_100_rewards, bins=15, alpha=0.7, color='lightsalmon', edgecolor='darkorange', linewidth=1)
ax3.axvline(mean_reward, color='red', linestyle='--', linewidth=2, label=f'Mean: {mean_reward:.2f}')
ax3.set_title('Final 100 Episodes - Reward Distribution', fontweight='bold')
ax3.set_xlabel('Reward')
ax3.set_ylabel('Frequency')
ax3.grid(True, alpha=0.3)
# Add statistics text box
stats_text = f'Mean: {mean_reward:.2f}\n+1σ: {mean_reward + std_reward:.2f}\n-1σ: {mean_reward - std_reward:.2f}'
ax3.text(0.75, 0.75, stats_text, transform=ax3.transAxes,
bbox=dict(boxstyle="round,pad=0.3", facecolor='moccasin', alpha=0.8),
fontsize=10, verticalalignment='top')
# 4. Learning Stability (bottom-right)
# Calculate rolling standard deviation with window=100
stability_window = 100
rolling_std = []
for i in range(stability_window-1, len(rewards)):
window_data = rewards[i-stability_window+1:i+1]
rolling_std.append(np.std(window_data))
ax4.plot(range(stability_window-1, len(rewards)), rolling_std, color='darkorange', linewidth=2)
ax4.set_title(f'Learning Stability (Moving Std, window={stability_window})', fontweight='bold')
ax4.set_xlabel('Episodes')
ax4.set_ylabel('Reward Standard Deviation')
ax4.grid(True, alpha=0.3)
# Add final stability annotation
final_std = rolling_std[-1] if rolling_std else 0
ax4.annotate(f'Final Std: {final_std:.1f}',
xy=(0.02, 0.95), xycoords='axes fraction',
bbox=dict(boxstyle="round,pad=0.3", facecolor='moccasin', alpha=0.7),
fontsize=10, fontweight='bold')
# Adjust layout
plt.tight_layout()
# Save visualization
output_dir = Path("run_3K_episodes/outputs_pump_cbm_v047_balanced")
output_dir.mkdir(parents=True, exist_ok=True)
output_path = output_dir / "balanced_simple_training_results.png"
plt.savefig(output_path, dpi=300, bbox_inches='tight', facecolor='white')
plt.close()
print(f"[SUCCESS] Simple visualization saved: {output_path}")
# Performance summary
print("[PERFORMANCE] Summary:")
print(f" Final Performance (last 100): {mean_reward:.2f}")
print(f" Peak Performance: {np.max(rewards):.2f}")
print(f" Final Stability (Std): {final_std:.1f}")
return True
except FileNotFoundError as e:
print(f"[ERROR] Training data file not found: {e}")
return False
except Exception as e:
print(f"[ERROR] Error creating visualization: {e}")
return False
if __name__ == "__main__":
print("[INFO] Generating Balanced Simple 3000-Episode Training Results Visualization...")
success = create_balanced_simple_visualization()
if success:
print("[SUCCESS] Simple visualization generation completed successfully!")
else:
print("[ERROR] Simple visualization generation failed!")