Parse file based on line indexes

PHamacher · PHamacher · commit 0fc5fb371c60 · 2022-02-03T11:20:07.000-03:00
diff --git a/src/pwf2dict.jl b/src/pwf2dict.jl
@@ -406,12 +406,16 @@ const _pwf_defaults = Dict("DBAR" => _default_dbar, "DLIN" => _default_dlin, "DC
 
 const title_identifier = "TITU"
 const end_section_identifier = "99999"
+const commented_line_identifier = '('
 
-function _remove_titles_from_file_lines(file_lines::Vector{String}, section_titles_idx::Vector{Int64})
-    remove_titles_idx = vcat(section_titles_idx, section_titles_idx .+ 1)
-    file_lines_without_titles_idx = setdiff(1:length(file_lines), remove_titles_idx)
-    file_lines = file_lines[file_lines_without_titles_idx]
-    return file_lines
+function _is_valid_line(line_number::Int64, file_lines::Vector{String})
+    line = file_lines[line_number]
+    if line_number == 1
+        return !isempty(line) && !startswith(line, commented_line_identifier) && line != title_identifier
+    else
+        previous_line = file_lines[line_number - 1]
+        return !isempty(line) && !startswith(line, commented_line_identifier) && line != title_identifier && previous_line != title_identifier
+    end
 end
 
 """
@@ -422,43 +426,37 @@ element corresponds to a section, divided by the delimiter 99999.
 """
 function _split_sections(io::IO)
     file_lines = readlines(io)
-    filter!(x -> x != "" && x[1] != '(', file_lines) # Ignore commented and empty lines
     file_lines = replace.(file_lines, repeat([Char(65533) => ' '], length(file_lines)))
-    sections = Vector{String}[]
+    sections = Dict{String, Vector{Int64}}()
 
     section_titles_idx = findall(line -> line == title_identifier, file_lines)
     if !isempty(section_titles_idx)
-        last_section_title_idx = section_titles_idx[end]:section_titles_idx[end] + 1
-        push!(sections, file_lines[last_section_title_idx])
+        sections[title_identifier] = [section_titles_idx[end] + 1]
     end
 
-    file_lines = _remove_titles_from_file_lines(
-        file_lines, section_titles_idx
-    )
-
     section_delim = vcat(
         0, 
         findall(x -> x == end_section_identifier, file_lines)
     )
 
     num_sections = length(section_delim) - 1
+    num_lines = length(file_lines)
 
     for i in 1:num_sections
-        section_begin_idx = section_delim[i] + 1
+        section_name_idx = filter(idx -> _is_valid_line(idx, file_lines), section_delim[i] + 1:num_lines)[1]
+        section_name = file_lines[section_name_idx]
+
+        section_begin_idx = section_name_idx + 1
         section_end_idx   = section_delim[i + 1] - 1
 
-        # Account for multiple sections in the same pwf
-        section_i = findall(x -> x[1] == file_lines[section_begin_idx], sections)
-        @assert length(section_i) < 2
-        if length(section_i) == 0
-            push!(sections, file_lines[section_begin_idx:section_end_idx])
-        else
-            section_i = section_i[1]
-            sections[section_i] = vcat(sections[section_i], file_lines[section_begin_idx + 1:section_end_idx])
-        end
+        section_range = collect(section_begin_idx:section_end_idx)
+        filter!(idx -> _is_valid_line(idx, file_lines), section_range)
+
+        current = get(sections, section_name, Int64[])
+        sections[section_name] = vcat(current, section_range)
     end
 
-    return sections
+    return file_lines, sections
 end
 
 function _handle_implicit_decimal_point!(
@@ -475,8 +473,9 @@ end
 Internal function. Parses a single line of data elements from a PWF file
 and saves it into `data::Dict`.
 """
-function _parse_line_element!(data::Dict{String, Any}, line::String, section::AbstractString)
-
+function _parse_line_element!(data::Dict{String, Any}, line_number::Int64, section::AbstractString, file_lines::Vector{String})
+    line = file_lines[line_number]
+    
     line_length = _pwf_dtypes[section][end][3][end]
     if length(line) < line_length
         extra_characters_needed = line_length - length(line)
@@ -498,7 +497,8 @@ function _parse_line_element!(data::Dict{String, Any}, line::String, section::Ab
             end
         catch message
             if !_needs_default(element)
-                throw(Memento.error(_LOGGER, "Parsing error at section $section: $field should be of type $dtype, received $element"))
+                throw(Memento.error(_LOGGER, "Parsing error in line $line_number at section $section: 
+                                    $field should be of type $dtype, received $element"))
             end
             data[field] = element
         end
@@ -507,11 +507,12 @@ function _parse_line_element!(data::Dict{String, Any}, line::String, section::Ab
 
 end
 
-function _parse_line_element!(data::Dict{String, Any}, lines::Vector{String}, section::AbstractString)
+function _parse_line_element!(data::Dict{String, Any}, lines_idx::Vector{Int64}, section::AbstractString, file_lines::Vector{String})
 
     mn_keys, mn_values, mn_type = _mnemonic_pairs[section]
 
-    for line in lines
+    for line_number in lines_idx
+        line = file_lines[line_number]
         for i in 1:length(mn_keys)
             k, v = mn_keys[i], mn_values[i]
             if v[end] <= length(line)
@@ -521,8 +522,9 @@ function _parse_line_element!(data::Dict{String, Any}, lines::Vector{String}, se
                         data[line[k]] = parse(mn_type, line[v])
                     catch message
                         if !_needs_default(line[v])
-                            throw(Memento.error(_LOGGER, "Parsing error at section $section: $field should be of type $dtype, received $element"))
-                        end
+                            throw(Memento.error(_LOGGER, "Parsing error in line $line_number at section $section: 
+                                                $field should be of type $dtype, received $element"))
+                end
                         !_needs_default(line[k]) ? data[line[k]] = line[v] : nothing
                     end
                 else
@@ -539,42 +541,43 @@ end
 Internal function. Parses a section containing a system component.
 Returns a Vector of Dict, where each entry corresponds to a single element.
 """
-function _parse_section_element!(data::Dict{String, Any}, section_lines::Vector{String}, section::AbstractString, idx::Int64=1)
+function _parse_section_element!(data::Dict{String, Any}, section_lines_idx::Vector{Int64}, section::AbstractString, file_lines::Vector{String}, idx::Int64 = 1)
 
     if section == "DBAR"
-        for line in section_lines[2:end]
+        for line_number in section_lines_idx
 
             line_data = Dict{String, Any}()
-            _parse_line_element!(line_data, line, section)
+            _parse_line_element!(line_data, line_number, section, file_lines)
 
             bus_i = line_data["NUMBER"]
             data["$bus_i"] = line_data
         end
 
     else
-        for line in section_lines[2:end]
+        for line_number in section_lines_idx
 
             line_data = Dict{String, Any}()
-            _parse_line_element!(line_data, line, section)
+            _parse_line_element!(line_data, line_number, section, file_lines)
 
             data["$idx"] = line_data            
             idx += 1
         end
     end
 end
 
-function _parse_divided_section!(data::Dict{String, Any}, section_lines::Vector{String}, section::String)
+function _parse_divided_section!(data::Dict{String, Any}, section_lines_idx::Vector{Int64}, section::String, file_lines::Vector{String})
+    section_lines = file_lines[section_lines_idx]
 
     separator = _divided_sections[section]["separator"]
-    sub_titles_idx = vcat(1, findall(x -> x == separator, section_lines))
+    sub_titles_idx = vcat(0, findall(x -> x == separator, section_lines))
     for (i, idx) in enumerate(sub_titles_idx)
 
         if idx != sub_titles_idx[end]
             next_idx = sub_titles_idx[i + 1]
-            _parse_section_element!(data, section_lines[idx:idx + 1], _divided_sections[section]["first name"], i)
+            _parse_section_element!(data, [section_lines_idx[idx + 1]], _divided_sections[section]["first name"], file_lines, i)
 
             rc = Dict{String, Any}()
-            _parse_section_element!(rc, section_lines[idx + 1:next_idx - 1], _divided_sections[section]["second name"], i)
+            _parse_section_element!(rc, section_lines_idx[idx + 2:next_idx - 1], _divided_sections[section]["second name"], file_lines, i)
 
             group = _divided_sections[section]["subgroup"]
             data["$i"][group] = rc
@@ -589,21 +592,21 @@ end
 Internal function. Receives an array of lines corresponding to a PWF section,
 transforms it into a Dict and saves it into `data::Dict`.
 """
-function _parse_section!(data::Dict{String, Any}, section_lines::Vector{String})
-    section = section_lines[1]
+function _parse_section!(data::Dict{String, Any}, section::String, section_lines_idx::Vector{Int64}, file_lines::Vector{String})
     section_data = Dict{String, Any}()
 
     if section == title_identifier
-        section_data = section_lines[end]
+        @assert length(section_lines_idx) == 1
+        section_data = file_lines[section_lines_idx[1]]
 
     elseif section in keys(_mnemonic_pairs)
-        _parse_line_element!(section_data, section_lines[2:end], section)
+        _parse_line_element!(section_data, section_lines_idx, section, file_lines)
 
     elseif section in keys(_pwf_dtypes)
-        _parse_section_element!(section_data, section_lines, section)
+        _parse_section_element!(section_data, section_lines_idx, section, file_lines)
 
     elseif section in keys(_divided_sections)
-        _parse_divided_section!(section_data, section_lines, section)
+        _parse_divided_section!(section_data, section_lines_idx, section, file_lines)
 
     else
         Memento.warn(_LOGGER, "Currently there is no support for $section parsing")
@@ -820,11 +823,11 @@ Internal function. Receives a pwf file as an IOStream and parses into a Dict.
 """
 function _parse_pwf_data(data_io::IO)
 
-    sections = _split_sections(data_io)
+    file_lines, sections = _split_sections(data_io)
     pwf_data = Dict{String, Any}()
     pwf_data["name"] = match(r"^\<file\s[\/\\]*(?:.*[\/\\])*(.*)\.pwf\>$", lowercase(data_io.name)).captures[1]
-    for section in sections
-        _parse_section!(pwf_data, section)
+    for (section_name, section) in sections
+        _parse_section!(pwf_data, section_name, section, file_lines)
     end
     _populate_defaults!(pwf_data)
     
diff --git a/test/test_pwf.jl b/test/test_pwf.jl
@@ -2,21 +2,21 @@
     @testset "Intermediary functions" begin
         file = open(joinpath(@__DIR__,"data/pwf/test_system.pwf"))
 
-        sections = PWF._split_sections(file)
-        @test isa(sections, Vector{Vector{String}})
+        file_lines, sections = PWF._split_sections(file)
+        @test isa(file_lines, Vector{String})
+        @test isa(sections, Dict{String, Vector{Int64}})
         @test length(sections) == 5
-        @test sections[1][1] == "TITU"
 
         data = Dict{String, Any}()
-        PWF._parse_section!(data, sections[1])
+        PWF._parse_section!(data, "TITU", sections["TITU"], file_lines)
         @test haskey(data, "TITU")
-        PWF._parse_section!(data, sections[2])
+        PWF._parse_section!(data, "DOPC IMPR", sections["DOPC IMPR"], file_lines)
         @test haskey(data, "DOPC IMPR")
-        PWF._parse_section!(data, sections[3])
+        PWF._parse_section!(data, "DCTE", sections["DCTE"], file_lines)
         @test haskey(data, "DCTE")
-        PWF._parse_section!(data, sections[4])
+        PWF._parse_section!(data, "DBAR", sections["DBAR"], file_lines)
         @test haskey(data, "DBAR")
-        PWF._parse_section!(data, sections[5])
+        PWF._parse_section!(data, "DLIN", sections["DLIN"], file_lines)
         @test haskey(data, "DLIN")
     end