Skip to content

Commit 72e6288

Browse files
Add line range to input::SourceFile (#311)
1 parent f0317ab commit 72e6288

File tree

5 files changed

+156
-7
lines changed

5 files changed

+156
-7
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ target_compile_definitions(${PROJECT_NAME}
9696
>
9797
)
9898

99+
set_property(SOURCE ${PROJECT_SOURCE_DIR}/src/input.cxx APPEND PROPERTY COMPILE_DEFINITIONS NDEBUG)
100+
99101
install(
100102
TARGETS ipr
101103
LIBRARY DESTINATION lib

include/ipr/input

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,72 @@ namespace ipr::input {
3737
ErrorCode error_code;
3838
};
3939

40+
// A morsel is a pierce of source text designated by an offset from
41+
// from the start of the source and its extent in bytes.
42+
struct Morsel {
43+
std::uint64_t offset : 48; // offset from the beginning of containing text
44+
std::uint64_t length : 16; // number of bytes from the start
45+
};
46+
4047
// Input source file mapped to memory as sequence of raw bytes.
48+
// UTF-8 is assumed as the encoding of the text.
4149
struct SourceFile {
42-
using View = std::span<const std::byte>;
50+
using View = std::span<const char8_t>;
51+
struct LineRange;
4352

4453
explicit SourceFile(const SystemPath&);
4554
SourceFile(SourceFile&&) noexcept;
4655
~SourceFile();
47-
View bytes() const { return view; }
56+
LineRange lines() const noexcept;
57+
View contents() const noexcept { return view; }
58+
View contents(Morsel m) const noexcept;
4859
private:
4960
View view;
5061
};
62+
63+
// A source file line range is an input_range of morsels, each representing a physical
64+
// line in the input source file.
65+
struct SourceFile::LineRange {
66+
using difference_type = std::ptrdiff_t;
67+
struct iterator;
68+
explicit LineRange(const SourceFile&);
69+
iterator begin() noexcept;
70+
iterator end() noexcept;
71+
private:
72+
const SourceFile* src;
73+
const char8_t* ptr;
74+
Morsel cache { };
75+
void next_line() noexcept;
76+
};
77+
78+
// An iterator for input source file line range.
79+
struct SourceFile::LineRange::iterator {
80+
using difference_type = std::ptrdiff_t;
81+
using value_type = Morsel;
82+
using iterator_category = std::input_iterator_tag;
83+
84+
explicit iterator(LineRange* r) noexcept : range{r} { }
85+
Morsel operator*() const noexcept;
86+
iterator& operator++() noexcept;
87+
void operator++(int) noexcept { ++(*this); }
88+
bool operator==(const iterator& that) const noexcept { return range == that.range; }
89+
bool operator!=(const iterator& that) const noexcept = default;
90+
private:
91+
LineRange* range;
92+
};
93+
94+
inline SourceFile::LineRange SourceFile::lines() const noexcept
95+
{
96+
return LineRange{*this};
97+
}
98+
99+
inline SourceFile::LineRange::iterator SourceFile::LineRange::begin() noexcept
100+
{
101+
return iterator{this};
102+
}
103+
104+
inline SourceFile::LineRange::iterator SourceFile::LineRange::end() noexcept
105+
{
106+
return iterator{nullptr};
107+
}
51108
}

src/input.cxx

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
# include <unistd.h>
1414
#endif
1515

16-
#include <ipr/input>
16+
#include <assert.h>
17+
#include <iostream>
18+
#include "ipr/input"
1719

1820
namespace ipr::input {
1921
#ifdef _WIN32
@@ -46,13 +48,13 @@ namespace ipr::input {
4648
LARGE_INTEGER s { };
4749
if (not GetFileSizeEx(file.get_handle(), &s))
4850
throw AccessError{ path, GetLastError() };
49-
if (s.QuadPart)
51+
if (s.QuadPart == 0)
5052
return;
5153
SystemHandle mapping = CreateFileMapping(file.get_handle(), nullptr, PAGE_READONLY, 0, 0, nullptr);
5254
if (mapping.get_handle() == nullptr)
5355
throw FileMappingError{ path, GetLastError() };
5456
auto start = MapViewOfFile(mapping.get_handle(), FILE_MAP_READ, 0, 0, 0);
55-
view = { reinterpret_cast<const std::byte*>(start), static_cast<View::size_type>(s.QuadPart) };
57+
view = { reinterpret_cast<const char8_t*>(start), static_cast<View::size_type>(s.QuadPart) };
5658
#else
5759
struct stat s { };
5860
errno = 0;
@@ -72,7 +74,7 @@ namespace ipr::input {
7274
close(fd);
7375
if (start == MAP_FAILED)
7476
throw FileMappingError{ path };
75-
view = { reinterpret_cast<std::byte*>(start), static_cast<View::size_type>(s.st_size) };
77+
view = { reinterpret_cast<const char8_t*>(start), static_cast<View::size_type>(s.st_size) };
7678
#endif
7779
}
7880

@@ -88,8 +90,69 @@ namespace ipr::input {
8890
#ifdef _WIN32
8991
UnmapViewOfFile(view.data());
9092
#else
91-
munmap(const_cast<std::byte*>(view.data()), view.size());
93+
munmap(const_cast<char8_t*>(view.data()), view.size());
9294
#endif
9395
}
9496
}
97+
98+
SourceFile::View SourceFile::contents(Morsel m) const noexcept
99+
{
100+
assert(m.length < view.size());
101+
return { view.data() + m.offset, m.length };
102+
}
103+
104+
// All code fragments directly indexable must have offsets and extents less than these limits.
105+
constexpr auto max_offset = std::uint64_t{1} << 48;
106+
constexpr auto max_extent = std::uint64_t{1} << 16;
107+
108+
// Characters from a raw input source file marking new lines: either CR+LR or just LF.
109+
constexpr char8_t carriage_return = 0x0D; // '\r';
110+
constexpr char8_t line_feed = 0x0A; // '\n';
111+
112+
void SourceFile::LineRange::next_line() noexcept
113+
{
114+
const auto offset = static_cast<std::uint64_t>(ptr - src->view.data());
115+
assert(offset < max_offset);
116+
const auto limit = src->view.size();
117+
std::uint64_t idx = 0;
118+
while (idx < limit and ptr[idx] != carriage_return and ptr[idx] != line_feed)
119+
++idx;
120+
assert(idx < max_extent);
121+
cache.offset = offset;
122+
cache.length = idx;
123+
124+
// Skip the new line marker.
125+
if (idx < limit)
126+
{
127+
if (ptr[idx] == carriage_return and idx+1 < limit and ptr[idx+1] == line_feed)
128+
++idx;
129+
++idx;
130+
}
131+
ptr += idx;
132+
}
133+
134+
SourceFile::LineRange::LineRange(const SourceFile& src) : src{&src}, ptr{src.view.data()}
135+
{
136+
// Skip a possible misguided UTF-8 BOM.
137+
if (src.view.size() >= 3 and ptr[0] == 0xEF and ptr[1] == 0xBB and ptr[2] == 0xBF)
138+
ptr += 3;
139+
next_line();
140+
}
141+
142+
Morsel SourceFile::LineRange::iterator::operator*() const noexcept
143+
{
144+
assert(range != nullptr);
145+
return range->cache;
146+
}
147+
148+
SourceFile::LineRange::iterator& SourceFile::LineRange::iterator::operator++() noexcept
149+
{
150+
assert(range != nullptr);
151+
if (range->ptr >= range->src->view.data() + range->src->view.size())
152+
range = nullptr;
153+
else
154+
range->next_line();
155+
156+
return *this;
157+
}
95158
}

tests/unit-tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ add_executable(${TEST_BINARY}
99
warehouse.cxx
1010
phased-eval.cxx
1111
specifiers.cxx
12+
lines.cxx
1213
)
1314

1415
target_link_libraries(${TEST_BINARY}

tests/unit-tests/lines.cxx

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#include "doctest/doctest.h"
2+
#ifdef _WIN32
3+
# include <windows.h>
4+
# define WIDEN_(S) L ## S
5+
# define WIDEN(S) WIDEN_(S)
6+
#else
7+
# define WIDEN(S) S
8+
#endif
9+
10+
#include <iostream>
11+
#include "ipr/input"
12+
13+
TEST_CASE("echo input file") {
14+
ipr::input::SystemPath path = WIDEN(__FILE__);
15+
ipr::input::SourceFile file{path};
16+
auto n = 1;
17+
std::cout << "file.size: " << file.contents().size() << std::endl;
18+
for (auto line : file.lines())
19+
{
20+
std::cout << '[' << n << ']'
21+
<< " -> {offset: " << line.offset
22+
<< ", length: " << line.length << "}\n";
23+
++n;
24+
}
25+
CHECK(n == 27); // Adjust this number based on the actual number of lines in the file
26+
}

0 commit comments

Comments
 (0)