Skip to content

Commit f39242d

Browse files
devmkijamaa
authored andcommitted
fix bug which lead to duplicate columns when reading hdf5 data
1 parent 3b72482 commit f39242d

File tree

2 files changed

+39
-70
lines changed

2 files changed

+39
-70
lines changed

source/Dialogs/ImportHDF5Dialog.vb

Lines changed: 23 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
'along with this program. If not, see <https://www.gnu.org/licenses/>.
1717
'
1818
Imports System.Windows.Forms
19-
Imports System.Text.RegularExpressions
2019

2120
''' <summary>
2221
''' Custom import dialog for HDF5 files with two-panel selection:
@@ -59,24 +58,20 @@ Friend Class ImportHDF5Dialog
5958

6059
'Process each series to extract element names and variable names
6160
For Each sInfo As TimeSeriesInfo In Me.tsFile.TimeSeriesInfos
62-
'Series name format: "DatasetName_ColumnName" (e.g., "T_Dru0161_Q_zu")
63-
'Parse to get: dataset name and column name
64-
Dim lastUnderscore As Integer = sInfo.Name.LastIndexOf("_"c)
65-
If lastUnderscore <= 0 Then Continue For
66-
67-
Dim datasetName As String = sInfo.Name.Substring(0, lastUnderscore)
68-
Dim columnName As String = sInfo.Name.Substring(lastUnderscore + 1)
69-
70-
'Extract base element name from dataset name
71-
Dim baseElement As String = ExtractBaseElementName(datasetName)
72-
Dim measurementType As String = ExtractMeasurementType(datasetName, baseElement)
73-
74-
'Build variable name: measurementType_columnName (e.g., "Q_zu") or just columnName if no measurement type
75-
Dim variableName As String
76-
If measurementType.Length > 0 Then
77-
variableName = $"{measurementType}_{columnName}"
78-
Else
79-
variableName = columnName
61+
'Use the dataset/column maps from GINA_HDF5
62+
Dim datasetName As String = Me.tsFile.SeriesDatasetMap(sInfo.Index)
63+
Dim columnName As String = Me.tsFile.SeriesColumnMap(sInfo.Index)
64+
65+
'Use dataset name as element name
66+
Dim baseElement As String = datasetName
67+
68+
'Use column name as variable name
69+
Dim variableName As String = columnName
70+
71+
'1D datasets have no column name
72+
If variableName = "" Then
73+
variableName = datasetName
74+
baseElement = datasetName
8075
End If
8176

8277
'Add to element series map
@@ -122,44 +117,6 @@ Friend Class ImportHDF5Dialog
122117
End Sub
123118

124119

125-
''' <summary>
126-
''' Extracts the base element name from a dataset name
127-
''' Pattern: Element names like T_Dru0161, T_Sch0821, T_Wfg0014 end with digits
128-
''' Dataset names may have additional suffixes like _Q, _Pges, _AFS
129-
''' </summary>
130-
Private Function ExtractBaseElementName(datasetName As String) As String
131-
'Pattern: Match element names that end with digits (e.g., T_Dru0161)
132-
'The base element is everything up to and including the numeric suffix
133-
Dim match As Match = Regex.Match(datasetName, "^(.+?\d+)")
134-
If match.Success Then
135-
Return match.Groups(1).Value
136-
Else
137-
'If no pattern match, use the full name up to the last underscore
138-
Dim lastUnderscore As Integer = datasetName.LastIndexOf("_"c)
139-
If lastUnderscore > 0 Then
140-
Return datasetName.Substring(0, lastUnderscore)
141-
Else
142-
Return datasetName
143-
End If
144-
End If
145-
End Function
146-
147-
''' <summary>
148-
''' Extracts the measurement type from a dataset name (e.g., "Q" from "T_Dru0161_Q")
149-
''' </summary>
150-
Private Function ExtractMeasurementType(datasetName As String, baseElementName As String) As String
151-
If datasetName.Length > baseElementName.Length AndAlso datasetName.StartsWith(baseElementName) Then
152-
Dim suffix As String = datasetName.Substring(baseElementName.Length)
153-
If suffix.StartsWith("_") Then
154-
Return suffix.Substring(1)
155-
Else
156-
Return suffix
157-
End If
158-
Else
159-
Return ""
160-
End If
161-
End Function
162-
163120
Private Sub Button_SelectAllElements_Click(sender As Object, e As EventArgs) Handles Button_SelectAllElements.Click
164121
For i As Integer = 0 To ListBox_Elements.Items.Count - 1
165122
ListBox_Elements.SetSelected(i, True)
@@ -217,21 +174,17 @@ Friend Class ImportHDF5Dialog
217174

218175
'Select series that match both selected elements AND selected variables
219176
For Each sInfo As TimeSeriesInfo In Me.tsFile.TimeSeriesInfos
220-
'Parse series name
221-
Dim lastUnderscore As Integer = sInfo.Name.LastIndexOf("_"c)
222-
If lastUnderscore <= 0 Then Continue For
223-
224-
Dim datasetName As String = sInfo.Name.Substring(0, lastUnderscore)
225-
Dim columnName As String = sInfo.Name.Substring(lastUnderscore + 1)
177+
'Use the dataset/column maps from GINA_HDF5
178+
Dim datasetName As String = Me.tsFile.SeriesDatasetMap(sInfo.Index)
179+
Dim columnName As String = Me.tsFile.SeriesColumnMap(sInfo.Index)
226180

227-
Dim baseElement As String = ExtractBaseElementName(datasetName)
228-
Dim measurementType As String = ExtractMeasurementType(datasetName, baseElement)
181+
Dim baseElement As String = datasetName
182+
Dim variableName As String = columnName
229183

230-
Dim variableName As String
231-
If measurementType.Length > 0 Then
232-
variableName = $"{measurementType}_{columnName}"
233-
Else
234-
variableName = columnName
184+
'1D datasets have no column name
185+
If variableName = "" Then
186+
variableName = datasetName
187+
baseElement = datasetName
235188
End If
236189

237190
'Check if this series should be selected

source/FileFormats/GINA_HDF5.vb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,16 @@ Namespace Fileformats
3939
''' </summary>
4040
Private _dataGroupName As String
4141

42+
''' <summary>
43+
''' Maps series index to the HDF5 dataset name (element name)
44+
''' </summary>
45+
Public ReadOnly SeriesDatasetMap As New Dictionary(Of Integer, String)
46+
47+
''' <summary>
48+
''' Maps series index to the column name (variable name)
49+
''' </summary>
50+
Public ReadOnly SeriesColumnMap As New Dictionary(Of Integer, String)
51+
4252
''' <summary>
4353
''' Flag indicating whether to show the import dialog
4454
''' </summary>
@@ -79,6 +89,8 @@ Namespace Fileformats
7989
Dim index As Integer = 0
8090

8191
Me.TimeSeriesInfos.Clear()
92+
Me.SeriesDatasetMap.Clear()
93+
Me.SeriesColumnMap.Clear()
8294

8395
Try
8496
Using h5File As NativeFile = PureHDF.H5File.OpenRead(Me.File)
@@ -157,6 +169,8 @@ Namespace Fileformats
157169
sInfo.Unit = "-"
158170
sInfo.Index = index
159171
Me.TimeSeriesInfos.Add(sInfo)
172+
Me.SeriesDatasetMap.Add(index, datasetName)
173+
Me.SeriesColumnMap.Add(index, "")
160174
index += 1
161175

162176
ElseIf dimensions.Length = 2 Then
@@ -188,6 +202,8 @@ Namespace Fileformats
188202
sInfo.Unit = colUnit
189203
sInfo.Index = index
190204
Me.TimeSeriesInfos.Add(sInfo)
205+
Me.SeriesDatasetMap.Add(index, datasetName)
206+
Me.SeriesColumnMap.Add(index, colName)
191207
index += 1
192208
Next
193209
End If

0 commit comments

Comments
 (0)