Skip to content

Commit 01b774a

Browse files
authored
Merge pull request #1 from EES-TUe/initial-implementation
Initial implementation
2 parents ebab8f3 + 84b4272 commit 01b774a

File tree

12 files changed

+615
-31
lines changed

12 files changed

+615
-31
lines changed

.github/pypi-publish.yml

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
name: Publish Python 🐍 distribution 📦 to PyPI
2+
3+
on: push
4+
5+
jobs:
6+
build:
7+
name: Build distribution 📦
8+
runs-on: ubuntu-latest
9+
10+
steps:
11+
- uses: actions/checkout@v4
12+
with:
13+
persist-credentials: false
14+
- name: Set up Python
15+
uses: actions/setup-python@v5
16+
with:
17+
python-version: "3.x"
18+
- name: Install pypa/build
19+
run: >-
20+
python3 -m
21+
pip install
22+
build
23+
--user
24+
- name: Build a binary wheel and a source tarball
25+
run: python3 -m build
26+
- name: Store the distribution packages
27+
uses: actions/upload-artifact@v4
28+
with:
29+
name: python-package-distributions
30+
path: dist/
31+
32+
publish-to-pypi:
33+
name: >-
34+
Publish Python 🐍 distribution 📦 to PyPI
35+
if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
36+
needs:
37+
- build
38+
runs-on: ubuntu-latest
39+
environment:
40+
name: pypi
41+
url: https://pypi.org/project/electricity-network-file-parser/ # Replace <package-name> with your PyPI project name
42+
permissions:
43+
id-token: write # IMPORTANT: mandatory for trusted publishing
44+
45+
steps:
46+
- name: Download all the dists
47+
uses: actions/download-artifact@v4
48+
with:
49+
name: python-package-distributions
50+
path: dist/
51+
- name: Publish distribution 📦 to PyPI
52+
uses: pypa/gh-action-pypi-publish@release/v1
53+
54+
github-release:
55+
name: >-
56+
Sign the Python 🐍 distribution 📦 with Sigstore
57+
and upload them to GitHub Release
58+
needs:
59+
- publish-to-pypi
60+
runs-on: ubuntu-latest
61+
62+
permissions:
63+
contents: write # IMPORTANT: mandatory for making GitHub Releases
64+
id-token: write # IMPORTANT: mandatory for sigstore
65+
66+
steps:
67+
- name: Download all the dists
68+
uses: actions/download-artifact@v4
69+
with:
70+
name: python-package-distributions
71+
path: dist/
72+
- name: Sign the dists with Sigstore
73+
uses: sigstore/gh-action-sigstore-python@v3.0.0
74+
with:
75+
inputs: >-
76+
./dist/*.tar.gz
77+
./dist/*.whl
78+
- name: Create GitHub Release
79+
env:
80+
GITHUB_TOKEN: ${{ github.token }}
81+
run: >-
82+
gh release create
83+
"$GITHUB_REF_NAME"
84+
--repo "$GITHUB_REPOSITORY"
85+
--notes ""
86+
- name: Upload artifact signatures to GitHub Release
87+
env:
88+
GITHUB_TOKEN: ${{ github.token }}
89+
# Upload to GitHub Release using the `gh` CLI.
90+
# `dist/` contains the built packages, and the
91+
# sigstore-produced signatures and certificates.
92+
run: >-
93+
gh release upload
94+
"$GITHUB_REF_NAME" dist/**
95+
--repo "$GITHUB_REPOSITORY"

README.md

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,23 @@
1-
# Python-project-template
2-
A template repository for new python projects
1+
# Network file parser
2+
A python package parsing gaia files. It takes a gaia file as input and parses the data in pandas data frames.
3+
4+
Example usage:
5+
6+
```python
7+
from electricity_network_file_parser.GnfParser import GnfParser
8+
9+
gnf_parser = GnfParser("test.gnf")
10+
gnf_parser.parse_file()
11+
12+
# To acces the dataframe of a single entity type
13+
gnf_parser.data_frames["CABLE"]
14+
15+
# Write all data to a single excel file each tab in the excel file represents an entity in the gaia file
16+
gnf_parser.write_all_data_frames()
17+
```
318

419
## Installation
520

621
Create a new python environment and run the following command:
722

8-
`pip install -e . `
23+
`pip install electricity_network_file_parser `

pyproject.toml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,24 @@ requires = ["hatchling"]
33
build-backend = "hatchling.build"
44

55
[project]
6-
name = "example_package_YOUR_PACKAGE_NAME_HERE"
6+
name = "electricity_network_file_parser"
77
version = "0.0.1"
88
authors = [
9-
{ name="Example Author", email="author@example.com" },
9+
{ name="Leo van Schooten", email="l.g.t.v.schooten@tue.nl" },
1010
]
11-
description = "A small example package"
11+
description = "A Python package for parsing gaia and vision files. "
1212
readme = "README.md"
1313
requires-python = ">=3.8"
1414
classifiers = [
1515
"Programming Language :: Python :: 3",
1616
"License :: OSI Approved :: MIT License",
1717
"Operating System :: OS Independent",
1818
]
19+
dependencies = [
20+
'pandas==2.2.3',
21+
'openpyxl==3.1.5'
22+
]
1923

2024
[project.urls]
21-
Homepage = "https://github.com/pypa/sampleproject"
22-
Issues = "https://github.com/pypa/sampleproject/issues"
25+
Homepage = "https://github.com/EES-TUe/network-file-parser"
26+
Issues = "https://github.com/EES-TUe/network-file-parser/issues"
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
from typing import List
2+
import pandas as pd
3+
4+
from pathlib import Path
5+
6+
from electricity_network_file_parser.dataclasses import PropertyDescription
7+
8+
class FileParser:
9+
def __init__(self, file_path):
10+
self.file_path = Path(file_path)
11+
self.data_frames :dict[str, pd.DataFrame] = {}
12+
13+
with open(self.file_path, mode='r') as file:
14+
lines = file.readlines()
15+
16+
self.entity_dict = self.create_entity_dict(lines)
17+
self.parse_entities_dict = { }
18+
19+
def is_integer(self, s : str):
20+
to_check = s
21+
22+
if to_check.startswith('-') or to_check.startswith('+'):
23+
to_check = to_check[1:]
24+
25+
return to_check.isdigit()
26+
27+
def parse_value(self, value):
28+
if value == 'True':
29+
return True
30+
elif value == 'False':
31+
return False
32+
elif self.is_integer(value):
33+
return int(value)
34+
elif value.startswith("'") and value.endswith("'"):
35+
return value[1:-1]
36+
else:
37+
return float(value.replace(",", "."))
38+
39+
def extend_dictionary(self, dict_to_extend : dict, dict_extension : dict):
40+
for key, value in dict_extension.items():
41+
dict_to_extend[key] = value
42+
43+
def parse_property_line(self, property_line : str):
44+
property_name = property_line[1: property_line.index(" ")]
45+
property_attributes = property_line[property_line.index(" "):]
46+
col_name = ''
47+
value = ''
48+
reading_value = False
49+
reading_string = False
50+
property_dict = {}
51+
for char in property_attributes:
52+
if char == ':' and not reading_value:
53+
reading_value = True
54+
elif not reading_value:
55+
col_name += char
56+
elif char == ' ' and not reading_string and value != '':
57+
property_dict[col_name.strip()] = self.parse_value(value)
58+
reading_value = False
59+
value = ''
60+
col_name = ''
61+
elif reading_value:
62+
if char == "'" and not reading_string:
63+
reading_string = True
64+
elif char == "'" and reading_string:
65+
reading_string = False
66+
value += char
67+
68+
property_dict[col_name.strip()] = self.parse_value(value)
69+
return PropertyDescription(property_name, property_dict)
70+
71+
def parse_entities(self, lines : List[str], property_attributes_to_parse : List[str]):
72+
parsed_property_types = []
73+
data_instance = {}
74+
data_instances = []
75+
for line in lines:
76+
line_stripped = line.strip()
77+
property_name = ""
78+
if " " in line_stripped:
79+
property_name = line_stripped[1: line_stripped.index(" ")]
80+
started_new_entity = property_name in parsed_property_types and property_name == "General"
81+
all_property_types_parsed = len(parsed_property_types) == len(property_attributes_to_parse)
82+
if started_new_entity or all_property_types_parsed:
83+
if not all_property_types_parsed:
84+
print(f"Not all property types are present for entity {line_stripped}")
85+
data_instances.append(data_instance)
86+
data_instance = {}
87+
parsed_property_types = []
88+
if property_name in property_attributes_to_parse:
89+
general_properties = self.parse_property_line(line_stripped)
90+
self.extend_dictionary(data_instance, general_properties.property_attributes)
91+
parsed_property_types.append(general_properties.property_type)
92+
93+
if len(data_instance.items()) > 0:
94+
data_instances.append(data_instance)
95+
96+
return pd.DataFrame(data_instances)
97+
98+
def create_entity_dict(self, lines):
99+
entity_indices = [i for i, line in enumerate(lines) if line.strip().startswith("[") and line.strip().endswith("]")]
100+
entity_start_indices = [val for i, val in enumerate(entity_indices) if i % 2 == 0]
101+
entity_end_indices = [val for i,val in enumerate(entity_indices) if i % 2 != 0]
102+
103+
entity_dict = {}
104+
105+
for i in range(0, len(entity_start_indices)):
106+
entity_name = lines[entity_start_indices[i]].strip()[1:-1]
107+
entity_dict[entity_name] = lines[entity_start_indices[i] + 1:entity_end_indices[i]]
108+
return entity_dict
109+
110+
def group_data_frame_by_columns(self, df : pd.DataFrame, columns_to_group_by : List[str]) -> pd.DataFrame:
111+
for col in columns_to_group_by:
112+
df[col] = df.apply(lambda x, col=col: -1 if pd.isnull(x[col]) or pd.isna(x[col]) else x[col], axis=1)
113+
result = df.groupby(columns_to_group_by).size().reset_index().rename(columns={0:'count'})
114+
return result
115+
116+
def get_records_containing_field_values(self, df : pd.DataFrame, fields : dict) -> pd.DataFrame:
117+
query = " and ".join([f"{key} == {value}" for key, value in fields.items()])
118+
return df.query(query)
119+
120+
def parse_cable_types(self, cables_df : pd.DataFrame) -> pd.DataFrame:
121+
pass
122+
123+
def write_all_data_frames(self, file_name : str = "data.xlsx"):
124+
self.data_frames["CABLETYPE"] = self.get_cable_type_data_as_dataframe()
125+
with pd.ExcelWriter(file_name) as writer:
126+
for name, dataframe in self.data_frames.items():
127+
dataframe.to_excel(writer, sheet_name=name, index=False)
128+
129+
def parse_file(self):
130+
for key, value in self.parse_entities_dict.items():
131+
if key in self.parse_entities_dict.keys() and key in self.entity_dict.keys():
132+
if key not in self.data_frames.keys():
133+
self.data_frames[key] = pd.DataFrame()
134+
self.data_frames[key] = pd.concat([self.data_frames[key], self.parse_entities(self.entity_dict[key], value)])
135+
if "CABLE" in self.data_frames.keys():
136+
self.data_frames["CABLETYPE"] = self.parse_cable_types(self.data_frames["CABLE"])
137+
138+
def write_all_data_frames(self, file_name : str = "data.xlsx"):
139+
with pd.ExcelWriter(file_name) as writer:
140+
for name, dataframe in self.data_frames.items():
141+
dataframe.to_excel(writer, sheet_name=name, index=False)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import pandas as pd
2+
from electricity_network_file_parser.FileParser import FileParser
3+
4+
class GnfParser(FileParser):
5+
6+
def __init__(self, file_path):
7+
super().__init__(file_path)
8+
self.parse_entities_dict = {
9+
"PROFILE" : ["General", "ProfileType"],
10+
"GM TYPE" : ["General"],
11+
"NODE" : ["General"],
12+
"LINK" : ["General"],
13+
"CABLE" : ["General", "CablePart", "CableType"],
14+
"TRANSFORMER" : ["General", "VoltageControl", "TransformerType"],
15+
"SOURCE" : ["General"],
16+
"LOAD" : ["General"],
17+
"HOME" : ["General", "ConnectionCableType", "FuseType"],
18+
"MEASURE FIELD" : ["General"],
19+
"FUSE": ["General"]
20+
}
21+
22+
def parse_cable_types(self, cables_df : pd.DataFrame) -> pd.DataFrame:
23+
columns_to_group_by = ["Unom", "Price", "C", "C0",
24+
"Inom0", "G1", "Inom1", "G2",
25+
"Inom2", "G3", "Inom3", "Ik1s",
26+
"Tr", "TInom", "TIk1s", "Frequency",
27+
"R_c", "X_c", "R_cc_n", "X_cc_n", "R_cc_o",
28+
"X_cc_o", "R_e", "X_e", "R_ce", "X_ce", "Inom_e", "Ik1s_e",
29+
"R_h", "X_h", "R_ch_n", "X_ch_n", "R_ch_o",
30+
"X_ch_o", "R_hh_n", "X_hh_n", "R_hh_o", "X_hh_o", "R_he", "X_he",
31+
"Inom_h", "Ik1s_h"]
32+
33+
unique_cable_types = self.group_data_frame_by_columns(cables_df, columns_to_group_by)
34+
dictionaries = unique_cable_types.to_dict('records')
35+
long_names = []
36+
short_names = []
37+
for dictionary in dictionaries:
38+
dictionary.pop("count")
39+
cables_with_cable_type_params = self.get_records_containing_field_values(cables_df, dictionary)
40+
cable_types = cables_with_cable_type_params["CableType"].unique()
41+
short_names_values = cables_with_cable_type_params["ShortName"].unique()
42+
long_names.append(",".join(cable_types))
43+
short_names.append(",".join(short_names_values))
44+
45+
unique_cable_types["Longnames"] = long_names
46+
unique_cable_types["Shortnames"] = short_names
47+
return unique_cable_types
File renamed without changes.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from dataclasses import dataclass
2+
3+
@dataclass
4+
class PropertyDescription:
5+
property_type : str
6+
property_attributes : dict

src/example_package_YOUR_PACKAGE_NAME_HERE/example_package_YOUR_PACKAGE_NAME_HERE.py

Lines changed: 0 additions & 3 deletions
This file was deleted.

src/main.py

Lines changed: 0 additions & 8 deletions
This file was deleted.

0 commit comments

Comments
 (0)