Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions graphgen/models/evaluator/kg/structure_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import Counter
from typing import Any, Dict, Optional

import numpy as np
Expand Down Expand Up @@ -81,14 +82,27 @@ def _calculate_powerlaw_r2(degree_map: Dict[str, int]) -> Optional[float]:
return None

try:
# Fit power law: log(y) = a * log(x) + b
log_degrees = np.log(degrees)
sorted_log_degrees = np.sort(log_degrees)
x = np.arange(1, len(sorted_log_degrees) + 1)
log_x = np.log(x)

degree_counts = Counter(degrees)
degree_values = []
frequencies = []
for deg, freq in sorted(degree_counts.items()):
if deg > 0 and freq > 0:
degree_values.append(deg)
frequencies.append(freq)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This block for populating degree_values and frequencies can be made more concise and efficient. The if deg > 0 and freq > 0: check is redundant because degrees are already filtered for positive values on line 78, and Counter does not store items with a frequency of zero. You can use the zip(*...) idiom for a more Pythonic and performant one-liner to replace this entire block.

Suggested change
degree_values = []
frequencies = []
for deg, freq in sorted(degree_counts.items()):
if deg > 0 and freq > 0:
degree_values.append(deg)
frequencies.append(freq)
degree_values, frequencies = zip(*sorted(degree_counts.items()))


if len(degree_values) < 3:
logger.warning(
f"Insufficient unique degrees ({len(degree_values)}) for power law fitting. "
f"Graph may be too uniform."
)
return None

# Fit power law: log(frequency) = a * log(degree) + b
log_degrees = np.log(degree_values)
log_frequencies = np.log(frequencies)

# Linear regression on log-log scale
r_value, *_ = stats.linregress(log_x, sorted_log_degrees)
r_value, *_ = stats.linregress(log_degrees, log_frequencies)
r2 = r_value**2

return float(r2)
Expand Down
Loading