Skip to content

Commit b7a1e2b

Browse files
Introduce input::SourceListing (#313)
* Introduce `input::SourceListing` To hold a source file along with its logical lines. * Include `vector` * Include `utility` to access `to_underlying`
1 parent b9da7de commit b7a1e2b

File tree

3 files changed

+137
-2
lines changed

3 files changed

+137
-2
lines changed

include/ipr/input

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,36 @@ namespace ipr::input {
4949
struct PhysicalLine {
5050
Morsel morsel { };
5151
std::uint32_t number { };
52+
53+
bool empty() const { return morsel.length == 0; }
54+
};
55+
56+
// A logical line is either a simple phyiscal line or a composite of multiple
57+
// physical lines spliced together.
58+
enum class LineSort : std::uint8_t {
59+
Simple = 0x01, // A simple, non-continuating physical line.
60+
Composite = 0x02, // Result of spliced multiple physical lines.
61+
};
62+
63+
struct SimpleLine {
64+
PhysicalLine line;
65+
};
66+
67+
struct CompositeLine {
68+
std::vector<PhysicalLine> lines;
69+
};
70+
71+
struct LineIndex {
72+
LineIndex(LineSort, std::uint32_t);
73+
LineSort sort() const { return static_cast<LineSort>(srt); }
74+
std::uint32_t index() const { return idx; }
75+
private:
76+
std::uint32_t srt : 2;
77+
std::uint32_t idx : 30;
5278
};
5379

80+
static_assert(sizeof(LineIndex) == 4);
81+
5482
// Input source file mapped to memory as sequence of raw bytes.
5583
// UTF-8 is assumed as the encoding of the text.
5684
struct SourceFile {
@@ -112,4 +140,21 @@ namespace ipr::input {
112140
{
113141
return iterator{nullptr};
114142
}
143+
144+
// A depot of lines read from an input source file.
145+
struct LineDepot {
146+
std::vector<SimpleLine> simples;
147+
std::vector<CompositeLine> composites;
148+
std::vector<LineIndex> indices;
149+
};
150+
151+
// An input source listing is a source file with its lines read into logical lines.
152+
struct SourceListing : SourceFile {
153+
explicit SourceListing(const SystemPath&);
154+
const SimpleLine& simple_line(LineIndex) const;
155+
const CompositeLine& composite_line(LineIndex) const;
156+
const std::vector<LineIndex>& logical_lines() const { return depot.indices; }
157+
private:
158+
LineDepot depot;
159+
};
115160
}

src/input.cxx

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,21 @@
1515

1616
#include <assert.h>
1717
#include <iostream>
18+
#include <utility>
1819
#include "ipr/input"
1920

2021
namespace ipr::input {
22+
static constexpr std::uint32_t index_watermark { 1u << 31 };
23+
24+
25+
LineIndex::LineIndex(LineSort s, std::uint32_t i)
26+
: srt{(assert(s == LineSort::Simple || s == LineSort::Composite), std::to_underlying(s))},
27+
idx{(assert(i < index_watermark), i)}
28+
{
29+
}
30+
31+
32+
2133
#ifdef _WIN32
2234
// Helper type for automatically closing a handle on scope exit.
2335
struct SystemHandle {
@@ -109,6 +121,17 @@ namespace ipr::input {
109121
constexpr char8_t carriage_return = 0x0D; // '\r';
110122
constexpr char8_t line_feed = 0x0A; // '\n';
111123

124+
static inline bool white_space(char8_t c)
125+
{
126+
switch (c)
127+
{
128+
case u' ': case u8'\t': case u8'\v': case u8'\f':
129+
return true;
130+
default:
131+
return false;
132+
}
133+
}
134+
112135
void SourceFile::LineRange::next_line() noexcept
113136
{
114137
const auto offset = static_cast<std::uint64_t>(ptr - src->view.data());
@@ -156,4 +179,68 @@ namespace ipr::input {
156179

157180
return *this;
158181
}
182+
183+
namespace {
184+
LineDepot read_lines(const SourceFile& src)
185+
{
186+
LineDepot depot { };
187+
const auto file_start = src.contents().data();
188+
189+
CompositeLine composite { };
190+
for (auto line: src.lines())
191+
{
192+
if (line.empty())
193+
continue;
194+
// Trim any trailing whitespace character when determining logical line continuation.
195+
const auto line_start = file_start + line.morsel.offset;
196+
auto cursor = line_start + line.morsel.length;
197+
while (--cursor > line_start and white_space(*cursor))
198+
;
199+
if (cursor <= line_start)
200+
continue; // skip entirely blank lines.
201+
if (*cursor == u8'\\')
202+
{
203+
line.morsel.length = cursor - line_start;
204+
composite.lines.push_back(line);
205+
continue;
206+
}
207+
else if (not composite.lines.empty())
208+
{
209+
composite.lines.push_back(line);
210+
auto idx = depot.composites.size();
211+
depot.composites.push_back(composite);
212+
depot.indices.emplace_back(LineSort::Composite, idx);
213+
composite.lines.clear();
214+
}
215+
else
216+
{
217+
auto idx = depot.simples.size();
218+
depot.indices.emplace_back(LineSort::Simple, idx);
219+
depot.simples.emplace_back(line);
220+
}
221+
}
222+
223+
return depot;
224+
}
225+
}
226+
227+
SourceListing::SourceListing(const SystemPath& path)
228+
: SourceFile{path}, depot{read_lines(*this)}
229+
{ }
230+
231+
const SimpleLine& SourceListing::simple_line(LineIndex line) const
232+
{
233+
assert(idx.sort() == LineSort::Simple);
234+
auto n = line.index();
235+
assert(n < depot.simples.size());
236+
return depot.simples[n];
237+
}
238+
239+
const CompositeLine& SourceListing::composite_line(LineIndex line) const
240+
{
241+
assert(idx.sort() == LineSort::Composite);
242+
auto n = line.index();
243+
assert(n < depot.composites.size());
244+
return depot.composites[n];
245+
}
159246
}

tests/unit-tests/lines.cxx

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,15 @@
77
# define WIDEN(S) S
88
#endif
99

10+
#define DUP(S) \
11+
S ## S
12+
1013
#include <iostream>
1114
#include "ipr/input"
1215

1316
TEST_CASE("echo input file") {
1417
ipr::input::SystemPath path = WIDEN(__FILE__);
15-
ipr::input::SourceFile file{path};
18+
ipr::input::SourceListing file{path};
1619
std::cout << "file.size: " << file.contents().size() << std::endl;
1720
std::uint32_t last_line_number = 0;
1821
for (auto line : file.lines())
@@ -22,5 +25,5 @@ TEST_CASE("echo input file") {
2225
<< ", length: " << line.morsel.length << "}\n";
2326
last_line_number = line.number;
2427
}
25-
CHECK(last_line_number == 26); // Adjust this number based on the actual number of lines in the file
28+
CHECK(last_line_number == 29); // Adjust this number based on the actual number of lines in the file
2629
}

0 commit comments

Comments
 (0)