1+ /*
2+ Copyright 2024 Huawei Technologies Co., Ltd.
3+
4+ Licensed under the Apache License, Version 2.0 (the "License");
5+ you may not use this file except in compliance with the License.
6+ You may obtain a copy of the License at
7+
8+ http://www.apache.org/licenses/LICENSE-2.0
9+
10+ Unless required by applicable law or agreed to in writing, software
11+ distributed under the License is distributed on an "AS IS" BASIS,
12+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ See the License for the specific language governing permissions and
14+ limitations under the License.
15+
16+ @author Toni Boehnlein, Christos Matzoros, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
17+ */
18+
19+ #pragma once
20+
21+ #include < fstream>
22+ #include < iostream>
23+ #include < sstream>
24+ #include < string>
25+ #include < vector>
26+ #include < limits>
27+ #include < filesystem>
28+
29+ #include " osp/partitioning/model/hypergraph.hpp"
30+ #include " osp/auxiliary/io/filepath_checker.hpp"
31+
32+ namespace osp {
33+ namespace file_reader {
34+
35+ // reads a matrix into Hypergraph format, where nonzeros are vertices, and rows/columns are hyperedges
36+ bool readHypergraphMartixMarketFormat (std::ifstream& infile, Hypergraph& hgraph) {
37+
38+ std::string line;
39+
40+ // Skip comments or empty lines (robustly)
41+ while (std::getline (infile, line)) {
42+ if (line.empty () || line[0 ] == ' %' ) continue ;
43+
44+ // Null byte check
45+ if (line.find (' \0 ' ) != std::string::npos) {
46+ std::cerr << " Error: Null byte detected in header line.\n " ;
47+ return false ;
48+ }
49+
50+ if (line.size () > MAX_LINE_LENGTH) {
51+ std::cerr << " Error: Line too long, possible malformed or malicious file.\n " ;
52+ return false ;
53+ }
54+ break ; // We found the actual header line
55+ }
56+
57+ if (infile.eof ()) {
58+ std::cerr << " Error: Unexpected end of file while reading header.\n " ;
59+ return false ;
60+ }
61+
62+ int M_row = 0 , M_col = 0 , nEntries = 0 ;
63+
64+ std::istringstream header_stream (line);
65+ if (!(header_stream >> M_row >> M_col >> nEntries) ||
66+ M_row <= 0 || M_col <= 0 ) {
67+ std::cerr << " Error: Invalid header.\n " ;
68+ return false ;
69+ }
70+
71+ const unsigned num_nodes = static_cast <unsigned >(nEntries);
72+ if (num_nodes > std::numeric_limits<unsigned >::max ()) {
73+ std::cerr << " Error: Matrix dimension too large for vertex type.\n " ;
74+ return false ;
75+ }
76+
77+ std::vector<int > node_work_wts (num_nodes, 0 );
78+ std::vector<int > node_comm_wts (num_nodes, 1 );
79+
80+ hgraph.reset (num_nodes, 0 );
81+ for (unsigned node = 0 ; node < num_nodes; ++node) {
82+ hgraph.set_vertex_weight (node, 1 );
83+ }
84+
85+ std::vector<std::vector<unsigned >> row_hyperedges (static_cast <unsigned >(M_row));
86+ std::vector<std::vector<unsigned >> column_hyperedges (static_cast <unsigned >(M_col));
87+
88+ int entries_read = 0 ;
89+ while (entries_read < nEntries && std::getline (infile, line)) {
90+ if (line.empty () || line[0 ] == ' %' ) continue ;
91+ if (line.size () > MAX_LINE_LENGTH) {
92+ std::cerr << " Error: Line too long.\n " ;
93+ return false ;
94+ }
95+
96+ std::istringstream entry_stream (line);
97+ int row = -1 , col = -1 ;
98+ double val = 0.0 ;
99+
100+ if (!(entry_stream >> row >> col >> val)) {
101+ std::cerr << " Error: Malformed matrix entry.\n " ;
102+ return false ;
103+ }
104+
105+ row -= 1 ; col -= 1 ; // Convert to 0-based
106+
107+ if (row < 0 || col < 0 || row >= M_row || col >= M_col) {
108+ std::cerr << " Error: Matrix entry out of bounds.\n " ;
109+ return false ;
110+ }
111+
112+ if (static_cast <unsigned >(row) >= num_nodes || static_cast <unsigned >(col) >= num_nodes) {
113+ std::cerr << " Error: Index exceeds vertex type limit.\n " ;
114+ return false ;
115+ }
116+
117+ row_hyperedges[static_cast <unsigned >(row)].push_back (static_cast <unsigned >(entries_read));
118+ column_hyperedges[static_cast <unsigned >(col)].push_back (static_cast <unsigned >(entries_read));
119+
120+ ++entries_read;
121+ }
122+
123+ if (entries_read != nEntries) {
124+ std::cerr << " Error: Incomplete matrix entries.\n " ;
125+ return false ;
126+ }
127+
128+ while (std::getline (infile, line)) {
129+ if (!line.empty () && line[0 ] != ' %' ) {
130+ std::cerr << " Error: Extra data after matrix content.\n " ;
131+ return false ;
132+ }
133+ }
134+
135+ for (unsigned row = 0 ; row < static_cast <unsigned >(M_row); ++row)
136+ if (!row_hyperedges[row].empty ())
137+ hgraph.add_hyperedge (row_hyperedges[row]);
138+
139+ for (unsigned col = 0 ; col < static_cast <unsigned >(M_col); ++col)
140+ if (!column_hyperedges[col].empty ())
141+ hgraph.add_hyperedge (column_hyperedges[col]);
142+
143+ return true ;
144+ }
145+
146+ bool readHypergraphMartixMarketFormat (const std::string& filename, Hypergraph& hgraph) {
147+ // Ensure the file is .mtx format
148+ if (std::filesystem::path (filename).extension () != " .mtx" ) {
149+ std::cerr << " Error: Only .mtx files are accepted.\n " ;
150+ return false ;
151+ }
152+
153+ if (!isPathSafe (filename)) {
154+ std::cerr << " Error: Unsafe file path (potential traversal attack).\n " ;
155+ return false ;
156+ }
157+
158+ if (std::filesystem::is_symlink (filename)) {
159+ std::cerr << " Error: Symbolic links are not allowed.\n " ;
160+ return false ;
161+ }
162+
163+ if (!std::filesystem::is_regular_file (filename)) {
164+ std::cerr << " Error: Input is not a regular file.\n " ;
165+ return false ;
166+ }
167+
168+ std::ifstream infile (filename);
169+ if (!infile.is_open ()) {
170+ std::cerr << " Error: Failed to open file.\n " ;
171+ return false ;
172+ }
173+
174+ return readHypergraphMartixMarketFormat (infile, hgraph);
175+ }
176+
177+ } // namespace FileReader
178+
179+ } // namespace osp
0 commit comments