Skip to content

Commit 21a3baa

Browse files
authored
Merge pull request #18 from gridfm/pm
Pm
2 parents a122d33 + e1b5326 commit 21a3baa

File tree

96 files changed

+15959
-2292
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+15959
-2292
lines changed

.coverage

52 KB
Binary file not shown.

.github/workflows/ci-build.yaml

Lines changed: 73 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,51 +6,106 @@ on:
66
pull_request:
77
branches:
88
- main
9+
910
jobs:
1011
pre-commit-run:
1112
runs-on: ubuntu-latest
1213
steps:
1314
- uses: actions/checkout@v4
14-
- name: Set up Python
15+
16+
- name: Set up Python 3.12
1517
uses: actions/setup-python@v4
1618
with:
17-
python-version: '3.10'
19+
python-version: '3.12'
20+
21+
- name: Cache pip
22+
uses: actions/cache@v3
23+
with:
24+
path: ~/.cache/pip
25+
key: pip-${{ runner.os }}-${{ hashFiles('**/requirements*.txt') }}
26+
restore-keys: |
27+
pip-${{ runner.os }}-
28+
1829
- name: Install dependencies
1930
run: pip install -e ".[dev]"
31+
2032
- name: Run pre-commit
21-
run: pre-commit run --verbose --all-files
33+
run: pre-commit run --verbose --all-files
2234

2335
security-test:
24-
runs-on: ubuntu-latest
25-
container: python:3.10-slim
36+
runs-on: ubuntu-latest
37+
container: python:3.12-slim
38+
steps:
39+
- uses: actions/checkout@v4
2640

27-
steps:
28-
- name: Checkout
29-
uses: actions/checkout@v4
41+
- name: Cache pip
42+
uses: actions/cache@v3
43+
with:
44+
path: ~/.cache/pip
45+
key: pip-${{ runner.os }}-${{ hashFiles('**/requirements*.txt') }}
46+
restore-keys: |
47+
pip-${{ runner.os }}-
3048
3149
- name: Install dependencies
3250
run: |
3351
python -m pip install --upgrade pip wheel
3452
pip install -e ".[dev]"
3553
3654
- name: Security Checks
37-
run: |
38-
bandit --severity-level high .
55+
run: bandit --severity-level high .
3956

4057
pytests:
41-
runs-on: ubuntu-latest
42-
container: python:3.10-slim
58+
runs-on: ubuntu-latest
59+
steps:
60+
- uses: actions/checkout@v4
4361

44-
steps:
45-
- name: Checkout
46-
uses: actions/checkout@v4
62+
- name: Cache Julia packages
63+
uses: actions/cache@v3
64+
with:
65+
path: |
66+
~/.julia
67+
key: julia-packages-${{ runner.os }}-${{ hashFiles('**/Project.toml') }}
68+
restore-keys: |
69+
julia-packages-${{ runner.os }}-
4770
48-
- name: Install dependencies
71+
- name: Install Julia 1.12
72+
uses: julia-actions/setup-julia@v1
73+
with:
74+
version: '1.12'
75+
76+
- name: Set up Python 3.12
77+
uses: actions/setup-python@v4
78+
with:
79+
python-version: '3.12'
80+
81+
- name: Cache pip
82+
uses: actions/cache@v3
83+
with:
84+
path: ~/.cache/pip
85+
key: pip-${{ runner.os }}-${{ hashFiles('**/requirements*.txt') }}
86+
restore-keys: |
87+
pip-${{ runner.os }}-
88+
89+
- name: Create virtualenv
90+
run: |
91+
python -m venv .venv
92+
source .venv/bin/activate
93+
94+
- name: Install Python dependencies
4995
run: |
96+
source .venv/bin/activate
5097
python -m pip install --upgrade pip wheel
5198
pip install -e ".[test]"
5299
100+
- name: Run Julia setup (PowerModels)
101+
env:
102+
JULIA_PKG_SERVER: ""
103+
run: |
104+
source .venv/bin/activate
105+
gridfm_datakit setup_pm
106+
53107
- name: Unit tests
54108
run: |
55-
pytest --cov=. tests/
56-
# testing
109+
source .venv/bin/activate
110+
export SKIP_LARGE_GRIDS=1
111+
pytest --cov=. tests/ -v -s

.gitignore

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,11 @@ notebooks/opf_pertubations.ipynb
1212
gridfm_datakit/grids/*.m
1313
notebooks/test_data
1414
tests/test_data
15-
tests/test_data_contingency
1615
build/
16+
baseline_perturbations/
17+
opf_baseline_perturbations/
18+
extract_results.ipynb
19+
opf_data/data
20+
test_data/
21+
scripts/datasets_sampled/
22+
pfdelta/data/

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
repos:
22
- repo: https://github.com/pre-commit/pre-commit-hooks
3-
rev: v5.0.0
3+
rev: v6.0.0
44
hooks:
55
- id: trailing-whitespace
66
- id: end-of-file-fixer
77
- id: check-yaml
88
- id: debug-statements
99
- repo: https://github.com/astral-sh/ruff-pre-commit
10-
rev: v0.12.0
10+
rev: v0.13.3
1111
hooks:
1212
- id: ruff-check
1313
- id: ruff-format
1414
- repo: https://github.com/PyCQA/flake8
15-
rev: 7.2.0
15+
rev: 7.3.0
1616
hooks:
1717
- id: flake8
1818
args: ["--ignore=E501,W503,E203"]
@@ -21,7 +21,7 @@ repos:
2121
hooks:
2222
- id: add-trailing-comma
2323
- repo: https://github.com/ibm/detect-secrets
24-
rev: 0.13.1+ibm.62.dss
24+
rev: 0.13.1+ibm.64.dss
2525
hooks:
2626
- id: detect-secrets # pragma: whitelist secret
2727
# Add options for detect-secrets-hook binary. You can run `detect-secrets-hook --help` to list out all possible options.

.secrets.baseline

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"files": "^.secrets.baseline$",
44
"lines": null
55
},
6-
"generated_at": "2025-04-07T14:02:04Z",
6+
"generated_at": "2025-10-03T08:52:13Z",
77
"plugins_used": [
88
{
99
"name": "AWSKeyDetector"
@@ -112,7 +112,7 @@
112112
}
113113
]
114114
},
115-
"version": "0.13.1+ibm.62.dss",
115+
"version": "0.13.1+ibm.64.dss",
116116
"word_list": {
117117
"file": null,
118118
"hash": null

README.md

Lines changed: 87 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
<b>gridfm-datakit</b>
88
</p>
99

10-
[![Docs](https://img.shields.io/badge/docs-available-brightgreen)](https://gridfm.github.io/gridfm-datakit/)
10+
![Docs](https://img.shields.io/badge/docs-available-brightgreen)
1111
![Coverage](https://img.shields.io/badge/coverage-76%25-yellow)
1212
![Python](https://img.shields.io/badge/python-3.10%20%E2%80%93%203.12-blue)
1313
![License](https://img.shields.io/badge/license-Apache%202.0-blue)
@@ -18,31 +18,38 @@ This library is brought to you by the GridFM team to generate power flow data to
1818
---
1919

2020

21-
22-
## Comparison with other PF datasets/ libraries
23-
24-
| Feature | GraphNeuralSolver [\[1\]](https://doi.org/10.1016/j.epsr.2020.106547) | OPFData [\[2\]](https://arxiv.org/abs/2406.07234) | OPFLearn [\[3\]](https://arxiv.org/abs/2111.01228) | PowerFlowNet [\[4\]](https://arxiv.org/abs/2311.03415) | TypedGNN [\[5\]](https://doi.org/10.1016/j.engappai.2022.105567) | PF△ [\[6\]](https://www.climatechange.ai/papers/iclr2025/67) | **PGLearn** [\[7\]](https://openreview.net/pdf?id=cecIf0CKnH) | **gridfm-datakit** [\[8\]](https://www.cell.com/joule/fulltext/S2542-4351(24)00470-7) |
25-
| ---------------------------------------------------------- | ----------------- | ------- | -------- | ------------- | -------- | --- | ----------------------------- | ---------- |
26-
| Generator Profile |||||||||
27-
| N-1 |||||||||
28-
| > 1000 Buses |||||||||
29-
| N-k, k > 1 |||||||||
30-
| Load Scenarios from Real World Data |||||||||
31-
| Net Param Perturbation |||||||||
32-
| Multi-processing and scalable to very large (1M+) datasets |||||||||
33-
34-
3521
# Installation
3622

37-
1. ⭐ Star the [repository](https://github.com/gridfm/gridfm-datakit) on GitHub to support the project!
23+
1. ⭐ Star the repository on GitHub to support the project!
3824

39-
2. Run:
25+
2. Make sure you have Python 3.10, 3.11, or 3.12 installed. ⚠️ Windows users: Python 3.12 is not supported. Use Python 3.10.11 or 3.11.9.
26+
27+
3. Install gridfm-datakit
4028

4129
```bash
4230
python -m pip install --upgrade pip # Upgrade pip
4331
pip install gridfm-datakit
4432
```
4533

34+
4. Install Julia with Powermodels and Ipopt
35+
36+
```bash
37+
gridfm_datakit setup_pm
38+
```
39+
40+
### For Developers
41+
42+
To install the latest development version from GitHub, follow these steps instead of step 3.
43+
44+
```bash
45+
git clone https://github.com/gridfm/gridfm-datakit.git
46+
cd "gridfm-datakit"
47+
python3 -m venv venv
48+
source venv/bin/activate
49+
python -m pip install --upgrade pip # Upgrade pip to ensure compatibility with pyproject.toml
50+
pip3 install -e '.[test,dev]'
51+
```
52+
4653
# Getting Started
4754

4855
## Option 1: Run data gen using interactive interface
@@ -57,76 +64,110 @@ interactive_interface()
5764

5865
## Option 2: Using the command line interface
5966

67+
### Generate Data
68+
6069
Run the data generation routine from the command line:
6170

6271
```bash
63-
gridfm_datakit path/to/config.yaml
72+
gridfm-datakit generate path/to/config.yaml
6473
```
6574

75+
### Validate Data
76+
77+
Validate generated power flow data for integrity and physical consistency:
78+
79+
```bash
80+
gridfm-datakit validate /path/to/data/ [--n-partitions 100] [--sn-mva 100]
81+
```
82+
83+
### Compute Statistics
84+
85+
Generate statistics plots from generated data:
86+
87+
```bash
88+
gridfm-datakit stats /path/to/data/ [--n-partitions 100] [--sn-mva 100]
89+
```
90+
91+
### Plot Feature Distributions
92+
93+
Create violin plots for bus feature distributions:
94+
95+
```bash
96+
gridfm-datakit plots /path/to/data/ [--n-partitions 100] [--output-dir DIR] [--sn-mva 100]
97+
```
6698

6799
## Configuration Overview
68100

69-
Refer to the sections [Network](network.md), [Load Scenarios](load_scenarios.md), and [Topology perturbations](topology_perturbations.md) for a description of the configuration parameters.
101+
Refer to the sections Network, Load Scenarios, and Topology perturbations of the [documentation](https://gridfm.github.io/gridfm-datakit/) for a description of the configuration parameters.
70102

71103
Sample configuration files are provided in `scripts/config`, e.g. `default.yaml`:
72104

73105
```yaml
74106
network:
75107
name: "case24_ieee_rts" # Name of the power grid network (without extension)
76-
source: "pglib" # Data source for the grid; options: pglib, pandapower, file
108+
source: "pglib" # Data source for the grid; options: pglib, file
109+
# WARNING: the following parameter is only used if source is "file"
77110
network_dir: "scripts/grids" # if using source "file", this is the directory containing the network file (relative to the project root)
78111
79-
80112
load:
81113
generator: "agg_load_profile" # Name of the load generator; options: agg_load_profile, powergraph
82114
agg_profile: "default" # Name of the aggregated load profile
83-
scenarios: 200 # Number of different load scenarios to generate
115+
scenarios: 10000 # Number of different load scenarios to generate
84116
# WARNING: the following parameters are only used if generator is "agg_load_profile"
85117
# if using generator "powergraph", these parameters are ignored
86-
sigma: 0.05 # max local noise
118+
sigma: 0.2 # max local noise
87119
change_reactive_power: true # If true, changes reactive power of loads. If False, keeps the ones from the case file
88120
global_range: 0.4 # Range of the global scaling factor. used to set the lower bound of the scaling factor
89121
max_scaling_factor: 4.0 # Max upper bound of the global scaling factor
90-
step_size: 0.025 # Step size when finding the upper bound of the global scaling factor
91-
start_scaling_factor: 0.8 # Initial value of the global scaling factor
122+
step_size: 0.1 # Step size when finding the upper bound of the global scaling factor
123+
start_scaling_factor: 1.0 # Initial value of the global scaling factor
92124
93125
topology_perturbation:
94126
type: "random" # Type of topology generator; options: n_minus_k, random, none
95127
# WARNING: the following parameters are only used if type is not "none"
96128
k: 1 # Maximum number of components to drop in each perturbation
97-
n_topology_variants: 5 # Number of unique perturbed topologies per scenario
98-
elements: ["line", "trafo", "gen", "sgen"] # elements to perturb options: line, trafo, gen, sgen
129+
n_topology_variants: 20 # Number of unique perturbed topologies per scenario
130+
elements: [branch, gen] # elements to perturb. options: branch, gen
99131
100132
generation_perturbation:
101133
type: "cost_permutation" # Type of generation perturbation; options: cost_permutation, cost_perturbation, none
102-
# WARNING: the following parameters are onlyused if type is "cost_perturbation"
134+
# WARNING: the following parameter is only used if type is "cost_permutation"
103135
sigma: 1.0 # Size of range use for sampling scaling factor
104136
137+
admittance_perturbation:
138+
type: "random_perturbation" # Type of admittance perturbation; options: random_perturbation, none
139+
# WARNING: the following parameter is only used if type is "random_perturbation"
140+
sigma: 0.2 # Size of range used for sampling scaling factor
141+
105142
settings:
106-
num_processes: 10 # Number of parallel processes to use
143+
num_processes: 16 # Number of parallel processes to use
107144
data_dir: "./data_out" # Directory to save generated data relative to the project root
108-
large_chunk_size: 50 # Number of load scenarios processed before saving
109-
no_stats: false # If true, disables statistical calculations
110-
overwrite: true # If true, overwrites existing files, if false, appends to files (note that bus_params.csv, edge_params.csv, scenarios_{load.generator}.csv and scenarios_{load.generator}.html will still be overwritten)
111-
mode: "pf" # Mode of the script; options: contingency, pf
145+
large_chunk_size: 1000 # Number of load scenarios processed before saving
146+
overwrite: true # If true, overwrites existing files, if false, appends to files
147+
mode: "pf" # Mode of the script; options: pf, opf. pf: power flow data where one or more operating limits – the inequality constraints defined in OPF, e.g., voltage magnitude or branch limits – may be violated. opf: datapoints for training OPF solvers, with cost-optimal dispatches that satisfy all operating limits (OPF-feasible)
148+
include_dc_res: true # If true, also stores the results of dc power flow (in addition to the results AC power flow). does not work with mode "opf"
149+
enable_solver_logs: true # If true, write OPF/PF logs to {data_dir}/solver_log; PF fast and DCPF fast do not log.
150+
pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks e.g. case10000_goc do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF.
151+
dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl)
152+
max_iter: 200 # Max iterations for Ipopt-based solvers
112153
```
113154

114155
<br>
115156

116157
## Output Files
117158

118-
The data generation process produces several output files in the specified data directory:
159+
The data generation process writes the following artifacts under:
160+
`{settings.data_dir}/{network.name}/raw`
119161

120162
- **tqdm.log**: Progress bar log.
121-
- **error.log**: Log of the errors raised during data generation.
122-
- **args.log**: Copy of the config file used.
123-
- **pf_node.csv**: Data related to the nodes (buses) in the network, such as voltage levels and power injections.
124-
- **pf_edge.csv**: Branch admittance matrix for each pf case.
125-
- **branch_idx_removed.csv**: List of the indices of the branches (lines and transformers) that got removed when perturbing the topologies.
126-
- **edge_params.csv**: Branch admittance matrix and branch rate limits for the unperturbed topology.
127-
- **bus_params.csv**: Parameters for the buses (voltage limits and the base voltage).
128-
- **scenario_{args.load.generator}.csv**: Load element-level load profile obtained after using the load scenario generator.
129-
- **scenario_{args.load.generator}.html**: Plots of the element-level load profile.
130-
- **scenario_{args.load.generator}.log**: If generator is "agg_load_profile", stores the upper and lower bounds for the global scaling factor.
131-
- **stats.csv**: Stats about the generated data.
132-
- **stats_plot.html**: Plots of the stats about the generated data.
163+
- **error.log**: Error messages captured during generation.
164+
- **args.log**: YAML dump of the configuration used for this run.
165+
- **scenarios_{generator}.parquet**: Load scenarios (per-element time series) produced by the selected load generator.
166+
- **scenarios_{generator}.html**: Plot of the generated load scenarios.
167+
- **scenarios_{generator}.log**: Generator-specific notes (e.g., bounds for the global scaling factor when using `agg_load_profile`).
168+
- **n_scenarios.txt**: Metadata file containing the total number of scenarios (used for efficient partition management).
169+
- **bus_data.parquet**: Bus-level features for each processed scenario, partitioned by `scenario_partition` (columns `BUS_COLUMNS` and, if `settings.include_dc_res=True`, also `DC_BUS_COLUMNS`).
170+
- **gen_data.parquet**: Generator features per scenario, partitioned by `scenario_partition` (columns `GEN_COLUMNS`).
171+
- **branch_data.parquet**: Branch features per scenario, partitioned by `scenario_partition` (columns `BRANCH_COLUMNS`).
172+
- **y_bus_data.parquet**: Nonzero Y-bus entries per scenario, partitioned by `scenario_partition` with columns `[scenario, index1, index2, G, B]`.
173+
- **runtime_data.parquet**: Runtime data for each scenario, partitioned by `scenario_partition` (AC and DC solver execution times).

0 commit comments

Comments
 (0)