gridfm
diff --git a/‎.coverage‎
52 KB b/‎.coverage‎
52 KB
diff --git a/‎.github/workflows/ci-build.yaml‎
Lines changed: 73 additions & 18 deletions b/‎.github/workflows/ci-build.yaml‎
Lines changed: 73 additions & 18 deletions
diff --git a/‎.gitignore‎
Lines changed: 7 additions & 1 deletion b/‎.gitignore‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 4 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎.secrets.baseline‎
Lines changed: 2 additions & 2 deletions b/‎.secrets.baseline‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 87 additions & 46 deletions b/‎README.md‎
Lines changed: 87 additions & 46 deletions
@@ -6,51 +6,106 @@ on:
   pull_request:
     branches:
       - main
+
 jobs:
   pre-commit-run:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - name: Set up Python
+
+      - name: Set up Python 3.12
         uses: actions/setup-python@v4
         with:
-          python-version: '3.10'
+          python-version: '3.12'
+
+      - name: Cache pip
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: pip-${{ runner.os }}-${{ hashFiles('**/requirements*.txt') }}
+          restore-keys: |
+            pip-${{ runner.os }}-
+
       - name: Install dependencies
         run: pip install -e ".[dev]"
+
       - name: Run pre-commit
-        run: pre-commit run --verbose  --all-files
+        run: pre-commit run --verbose --all-files
 
   security-test:
-      runs-on: ubuntu-latest
-      container: python:3.10-slim
+    runs-on: ubuntu-latest
+    container: python:3.12-slim
+    steps:
+      - uses: actions/checkout@v4
 
-      steps:
-      - name: Checkout
-        uses: actions/checkout@v4
+      - name: Cache pip
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: pip-${{ runner.os }}-${{ hashFiles('**/requirements*.txt') }}
+          restore-keys: |
+            pip-${{ runner.os }}-
 
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip wheel
           pip install -e ".[dev]"
 
       - name: Security Checks
-        run: |
-          bandit --severity-level high .
+        run: bandit --severity-level high .
 
   pytests:
-      runs-on: ubuntu-latest
-      container: python:3.10-slim
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
 
-      steps:
-      - name: Checkout
-        uses: actions/checkout@v4
+      - name: Cache Julia packages
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.julia
+          key: julia-packages-${{ runner.os }}-${{ hashFiles('**/Project.toml') }}
+          restore-keys: |
+            julia-packages-${{ runner.os }}-
 
-      - name: Install dependencies
+      - name: Install Julia 1.12
+        uses: julia-actions/setup-julia@v1
+        with:
+          version: '1.12'
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+
+      - name: Cache pip
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: pip-${{ runner.os }}-${{ hashFiles('**/requirements*.txt') }}
+          restore-keys: |
+            pip-${{ runner.os }}-
+
+      - name: Create virtualenv
+        run: |
+          python -m venv .venv
+          source .venv/bin/activate
+
+      - name: Install Python dependencies
         run: |
+          source .venv/bin/activate
           python -m pip install --upgrade pip wheel
           pip install -e ".[test]"
 
+      - name: Run Julia setup (PowerModels)
+        env:
+          JULIA_PKG_SERVER: ""
+        run: |
+          source .venv/bin/activate
+          gridfm_datakit setup_pm
+
       - name: Unit tests
         run: |
-          pytest --cov=. tests/
-# testing
+          source .venv/bin/activate
+          export SKIP_LARGE_GRIDS=1
+          pytest --cov=. tests/ -v -s
@@ -12,5 +12,11 @@ notebooks/opf_pertubations.ipynb
 gridfm_datakit/grids/*.m
 notebooks/test_data
 tests/test_data
-tests/test_data_contingency
 build/
+baseline_perturbations/
+opf_baseline_perturbations/
+extract_results.ipynb
+opf_data/data
+test_data/
+scripts/datasets_sampled/
+pfdelta/data/
@@ -1,18 +1,18 @@
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v6.0.0
     hooks:
     -   id: trailing-whitespace
     -   id: end-of-file-fixer
     -   id: check-yaml
     -   id: debug-statements
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.12.0
+  rev: v0.13.3
   hooks:
     - id: ruff-check
     - id: ruff-format
 -   repo: https://github.com/PyCQA/flake8
-    rev: 7.2.0
+    rev: 7.3.0
     hooks:
     -   id: flake8
         args: ["--ignore=E501,W503,E203"]
@@ -21,7 +21,7 @@ repos:
     hooks:
     -   id: add-trailing-comma
 -   repo: https://github.com/ibm/detect-secrets
-    rev: 0.13.1+ibm.62.dss
+    rev: 0.13.1+ibm.64.dss
     hooks:
         - id: detect-secrets # pragma: whitelist secret
         # Add options for detect-secrets-hook binary. You can run `detect-secrets-hook --help` to list out all possible options.
 
@@ -3,7 +3,7 @@
     "files": "^.secrets.baseline$",
     "lines": null
   },
-  "generated_at": "2025-04-07T14:02:04Z",
+  "generated_at": "2025-10-03T08:52:13Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -112,7 +112,7 @@
       }
     ]
   },
-  "version": "0.13.1+ibm.62.dss",
+  "version": "0.13.1+ibm.64.dss",
   "word_list": {
     "file": null,
     "hash": null
 
@@ -7,7 +7,7 @@
     <b>gridfm-datakit</b>
 </p>
 
-[![Docs](https://img.shields.io/badge/docs-available-brightgreen)](https://gridfm.github.io/gridfm-datakit/)
+![Docs](https://img.shields.io/badge/docs-available-brightgreen)
 ![Coverage](https://img.shields.io/badge/coverage-76%25-yellow)
 ![Python](https://img.shields.io/badge/python-3.10%20%E2%80%93%203.12-blue)
 ![License](https://img.shields.io/badge/license-Apache%202.0-blue)
@@ -18,31 +18,38 @@ This library is brought to you by the GridFM team to generate power flow data to
 ---
 
 
-
-## Comparison with other PF datasets/ libraries
-
-| Feature                                                    | GraphNeuralSolver [\[1\]](https://doi.org/10.1016/j.epsr.2020.106547) | OPFData [\[2\]](https://arxiv.org/abs/2406.07234) | OPFLearn [\[3\]](https://arxiv.org/abs/2111.01228) | PowerFlowNet [\[4\]](https://arxiv.org/abs/2311.03415) | TypedGNN [\[5\]](https://doi.org/10.1016/j.engappai.2022.105567) | PF△ [\[6\]](https://www.climatechange.ai/papers/iclr2025/67) | **PGLearn** [\[7\]](https://openreview.net/pdf?id=cecIf0CKnH) | **gridfm-datakit** [\[8\]](https://www.cell.com/joule/fulltext/S2542-4351(24)00470-7) |
-| ---------------------------------------------------------- | ----------------- | ------- | -------- | ------------- | -------- | --- | ----------------------------- | ---------- |
-| Generator Profile                                          | ✅                | ❌      | ❌       | ✅            | ✅       | ✅  | ❌                            | ✅         |
-| N-1                                                        | ❌                | ✅      | ❌       | ❌            | ✅       | ✅  | ✅                            | ✅         |
-| > 1000 Buses                                               | ❌                | ✅      | ✅       | ❌            | ❌       | ✅  | ✅                            | ✅         |
-| N-k, k > 1                                                 | ❌                | ❌      | ❌       | ❌            | ❌       | ❌  | ❌                            | ✅         |
-| Load Scenarios from Real World Data                        | ❌                | ❌      | ❌       | ❌            | ❌       | ❌  | ❌                            | ✅         |
-| Net Param Perturbation                                     | ✅                | ❌      | ❌       | ✅            | ✅       | ❌  | ❌                            | ✅         |
-| Multi-processing and scalable to very large (1M+) datasets | ❌                | ❌      | ❌       | ❌            | ❌       | ❌  | ✅                            | ✅         |
-
-
 # Installation
 
-1. ⭐ Star the [repository](https://github.com/gridfm/gridfm-datakit) on GitHub to support the project!
+1. ⭐ Star the repository on GitHub to support the project!
 
-2. Run:
+2. Make sure you have Python 3.10, 3.11, or 3.12 installed. ⚠️ Windows users: Python 3.12 is not supported. Use Python 3.10.11 or 3.11.9.
+
+3. Install gridfm-datakit
 
     ```bash
     python -m pip install --upgrade pip  # Upgrade pip
     pip install gridfm-datakit
     ```
 
+4. Install Julia with Powermodels and Ipopt
+
+    ```bash
+    gridfm_datakit setup_pm
+    ```
+
+### For Developers
+
+To install the latest development version from GitHub, follow these steps instead of step 3.
+
+```bash
+git clone https://github.com/gridfm/gridfm-datakit.git
+cd "gridfm-datakit"
+python3 -m venv venv
+source venv/bin/activate
+python -m pip install --upgrade pip  # Upgrade pip to ensure compatibility with pyproject.toml
+pip3 install -e '.[test,dev]'
+```
+
 # Getting Started
 
 ## Option 1: Run data gen using interactive interface
@@ -57,76 +64,110 @@ interactive_interface()
 
 ## Option 2: Using the command line interface
 
+### Generate Data
+
 Run the data generation routine from the command line:
 
 ```bash
-gridfm_datakit path/to/config.yaml
+gridfm-datakit generate path/to/config.yaml
 ```
 
+### Validate Data
+
+Validate generated power flow data for integrity and physical consistency:
+
+```bash
+gridfm-datakit validate /path/to/data/ [--n-partitions 100] [--sn-mva 100]
+```
+
+### Compute Statistics
+
+Generate statistics plots from generated data:
+
+```bash
+gridfm-datakit stats /path/to/data/ [--n-partitions 100] [--sn-mva 100]
+```
+
+### Plot Feature Distributions
+
+Create violin plots for bus feature distributions:
+
+```bash
+gridfm-datakit plots /path/to/data/ [--n-partitions 100] [--output-dir DIR] [--sn-mva 100]
+```
 
 ## Configuration Overview
 
-Refer to the sections [Network](network.md), [Load Scenarios](load_scenarios.md), and [Topology perturbations](topology_perturbations.md) for a description of the configuration parameters.
+Refer to the sections Network, Load Scenarios, and Topology perturbations of the [documentation](https://gridfm.github.io/gridfm-datakit/) for a description of the configuration parameters.
 
 Sample configuration files are provided in `scripts/config`, e.g. `default.yaml`:
 
 ```yaml
 network:
   name: "case24_ieee_rts" # Name of the power grid network (without extension)
-  source: "pglib" # Data source for the grid; options: pglib, pandapower, file
+  source: "pglib" # Data source for the grid; options: pglib, file
+  # WARNING: the following parameter is only used if source is "file"
   network_dir: "scripts/grids" # if using source "file", this is the directory containing the network file (relative to the project root)
 
-
 load:
   generator: "agg_load_profile" # Name of the load generator; options: agg_load_profile, powergraph
   agg_profile: "default" # Name of the aggregated load profile
-  scenarios: 200 # Number of different load scenarios to generate
+  scenarios: 10000 # Number of different load scenarios to generate
   # WARNING: the following parameters are only used if generator is "agg_load_profile"
   # if using generator "powergraph", these parameters are ignored
-  sigma: 0.05 # max local noise
+  sigma: 0.2 # max local noise
   change_reactive_power: true # If true, changes reactive power of loads. If False, keeps the ones from the case file
   global_range: 0.4 # Range of the global scaling factor. used to set the lower bound of the scaling factor
   max_scaling_factor: 4.0 # Max upper bound of the global scaling factor
-  step_size: 0.025 # Step size when finding the upper bound of the global scaling factor
-  start_scaling_factor: 0.8 # Initial value of the global scaling factor
+  step_size: 0.1 # Step size when finding the upper bound of the global scaling factor
+  start_scaling_factor: 1.0 # Initial value of the global scaling factor
 
 topology_perturbation:
   type: "random" # Type of topology generator; options: n_minus_k, random, none
   # WARNING: the following parameters are only used if type is not "none"
   k: 1 # Maximum number of components to drop in each perturbation
-  n_topology_variants: 5 # Number of unique perturbed topologies per scenario
-  elements: ["line", "trafo", "gen", "sgen"] # elements to perturb options: line, trafo, gen, sgen
+  n_topology_variants: 20 # Number of unique perturbed topologies per scenario
+  elements: [branch, gen] # elements to perturb. options: branch, gen
 
 generation_perturbation:
   type: "cost_permutation" # Type of generation perturbation; options: cost_permutation, cost_perturbation, none
-  # WARNING: the following parameters are onlyused if type is "cost_perturbation"
+  # WARNING: the following parameter is only used if type is "cost_permutation"
   sigma: 1.0 # Size of range use for sampling scaling factor
 
+admittance_perturbation:
+  type: "random_perturbation" # Type of admittance perturbation; options: random_perturbation, none
+  # WARNING: the following parameter is only used if type is "random_perturbation"
+  sigma: 0.2 # Size of range used for sampling scaling factor
+
 settings:
-  num_processes: 10 # Number of parallel processes to use
+  num_processes: 16 # Number of parallel processes to use
   data_dir: "./data_out" # Directory to save generated data relative to the project root
-  large_chunk_size: 50 # Number of load scenarios processed before saving
-  no_stats: false # If true, disables statistical calculations
-  overwrite: true # If true, overwrites existing files, if false, appends to files (note that bus_params.csv, edge_params.csv, scenarios_{load.generator}.csv and scenarios_{load.generator}.html will still be overwritten)
-  mode: "pf" # Mode of the script; options: contingency, pf
+  large_chunk_size: 1000 # Number of load scenarios processed before saving
+  overwrite: true # If true, overwrites existing files, if false, appends to files
+  mode: "pf" # Mode of the script; options: pf, opf. pf: power flow data where one or more operating limits – the inequality constraints defined in OPF, e.g., voltage magnitude or branch limits – may be violated. opf:  datapoints for training OPF solvers, with cost-optimal dispatches that satisfy all operating limits (OPF-feasible)
+  include_dc_res: true # If true, also stores the results of dc power flow (in addition to the results AC power flow). does not work with mode "opf"
+  enable_solver_logs: true # If true, write OPF/PF logs to {data_dir}/solver_log; PF fast and DCPF fast do not log.
+  pf_fast: true # Whether to use fast PF solver by default (compute_ac_pf from powermodels.jl); if false, uses Ipopt-based PF. Some networks e.g. case10000_goc do not work with pf_fast: true. pf_fast is faster and more accurate than the Ipopt-based PF.
+  dcpf_fast: true # Whether to use fast DCPF solver by default (compute_dc_pf from PowerModels.jl)
+  max_iter: 200 # Max iterations for Ipopt-based solvers
 ```
 
 <br>
 
 ## Output Files
 
-The data generation process produces several output files in the specified data directory:
+The data generation process writes the following artifacts under:
+`{settings.data_dir}/{network.name}/raw`
 
 - **tqdm.log**: Progress bar log.
-- **error.log**: Log of the errors raised during data generation.
-- **args.log**: Copy of the config file used.
-- **pf_node.csv**: Data related to the nodes (buses) in the network, such as voltage levels and power injections.
-- **pf_edge.csv**: Branch admittance matrix for each pf case.
-- **branch_idx_removed.csv**: List of the indices of the branches (lines and transformers) that got removed when perturbing the topologies.
-- **edge_params.csv**: Branch admittance matrix and branch rate limits for the unperturbed topology.
-- **bus_params.csv**: Parameters for the buses (voltage limits and the base voltage).
-- **scenario_{args.load.generator}.csv**: Load element-level load profile obtained after using the load scenario generator.
-- **scenario_{args.load.generator}.html**: Plots of the element-level load profile.
-- **scenario_{args.load.generator}.log**: If generator is "agg_load_profile", stores the upper and lower bounds for the global scaling factor.
-- **stats.csv**: Stats about the generated data.
-- **stats_plot.html**: Plots of the stats about the generated data.
+- **error.log**: Error messages captured during generation.
+- **args.log**: YAML dump of the configuration used for this run.
+- **scenarios_{generator}.parquet**: Load scenarios (per-element time series) produced by the selected load generator.
+- **scenarios_{generator}.html**: Plot of the generated load scenarios.
+- **scenarios_{generator}.log**: Generator-specific notes (e.g., bounds for the global scaling factor when using `agg_load_profile`).
+- **n_scenarios.txt**: Metadata file containing the total number of scenarios (used for efficient partition management).
+- **bus_data.parquet**: Bus-level features for each processed scenario, partitioned by `scenario_partition` (columns `BUS_COLUMNS` and, if `settings.include_dc_res=True`, also `DC_BUS_COLUMNS`).
+- **gen_data.parquet**: Generator features per scenario, partitioned by `scenario_partition` (columns `GEN_COLUMNS`).
+- **branch_data.parquet**: Branch features per scenario, partitioned by `scenario_partition` (columns `BRANCH_COLUMNS`).
+- **y_bus_data.parquet**: Nonzero Y-bus entries per scenario, partitioned by `scenario_partition` with columns `[scenario, index1, index2, G, B]`.
+- **runtime_data.parquet**: Runtime data for each scenario, partitioned by `scenario_partition` (AC and DC solver execution times).
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@`
`3`	`3`	`"files": "^.secrets.baseline$",`
`4`	`4`	`"lines": null`
`5`	`5`	`},`
`6`		`- "generated_at": "2025-04-07T14:02:04Z",`
	`6`	`+ "generated_at": "2025-10-03T08:52:13Z",`
`7`	`7`	`"plugins_used": [`
`8`	`8`	`{`
`9`	`9`	`"name": "AWSKeyDetector"`
`@@ -112,7 +112,7 @@`
`112`	`112`	`}`
`113`	`113`	`]`
`114`	`114`	`},`
`115`		`- "version": "0.13.1+ibm.62.dss",`
	`115`	`+ "version": "0.13.1+ibm.64.dss",`
`116`	`116`	`"word_list": {`
`117`	`117`	`"file": null,`
`118`	`118`	`"hash": null`