|
| 1 | +from collections import Counter |
1 | 2 | from typing import Any, Dict, Optional |
2 | 3 |
|
3 | 4 | import numpy as np |
@@ -81,14 +82,27 @@ def _calculate_powerlaw_r2(degree_map: Dict[str, int]) -> Optional[float]: |
81 | 82 | return None |
82 | 83 |
|
83 | 84 | try: |
84 | | - # Fit power law: log(y) = a * log(x) + b |
85 | | - log_degrees = np.log(degrees) |
86 | | - sorted_log_degrees = np.sort(log_degrees) |
87 | | - x = np.arange(1, len(sorted_log_degrees) + 1) |
88 | | - log_x = np.log(x) |
89 | | - |
| 85 | + degree_counts = Counter(degrees) |
| 86 | + degree_values = [] |
| 87 | + frequencies = [] |
| 88 | + for deg, freq in sorted(degree_counts.items()): |
| 89 | + if deg > 0 and freq > 0: |
| 90 | + degree_values.append(deg) |
| 91 | + frequencies.append(freq) |
| 92 | + |
| 93 | + if len(degree_values) < 3: |
| 94 | + logger.warning( |
| 95 | + f"Insufficient unique degrees ({len(degree_values)}) for power law fitting. " |
| 96 | + f"Graph may be too uniform." |
| 97 | + ) |
| 98 | + return None |
| 99 | + |
| 100 | + # Fit power law: log(frequency) = a * log(degree) + b |
| 101 | + log_degrees = np.log(degree_values) |
| 102 | + log_frequencies = np.log(frequencies) |
| 103 | + |
90 | 104 | # Linear regression on log-log scale |
91 | | - r_value, *_ = stats.linregress(log_x, sorted_log_degrees) |
| 105 | + r_value, *_ = stats.linregress(log_degrees, log_frequencies) |
92 | 106 | r2 = r_value**2 |
93 | 107 |
|
94 | 108 | return float(r2) |
|
0 commit comments