Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ target_compile_definitions(${PROJECT_NAME}
>
)

set_property(SOURCE ${PROJECT_SOURCE_DIR}/src/input.cxx APPEND PROPERTY COMPILE_DEFINITIONS NDEBUG)

install(
TARGETS ipr
LIBRARY DESTINATION lib
Expand Down
61 changes: 59 additions & 2 deletions include/ipr/input
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,72 @@
ErrorCode error_code;
};

// A morsel is a pierce of source text designated by an offset from
// from the start of the source and its extent in bytes.
struct Morsel {
std::uint64_t offset : 48; // offset from the beginning of containing text
std::uint64_t length : 16; // number of bytes from the start
};

// Input source file mapped to memory as sequence of raw bytes.
// UTF-8 is assumed as the encoding of the text.
struct SourceFile {
using View = std::span<const std::byte>;
using View = std::span<const char8_t>;
struct LineRange;

explicit SourceFile(const SystemPath&);
SourceFile(SourceFile&&) noexcept;
~SourceFile();
View bytes() const { return view; }
LineRange lines() const noexcept;
View contents() const noexcept { return view; }
View contents(Morsel m) const noexcept;
private:
View view;
};

// A source file line range is an input_range of morsels, each representing a physical
// line in the input source file.
struct SourceFile::LineRange {
using difference_type = std::ptrdiff_t;
struct iterator;
explicit LineRange(const SourceFile&);
iterator begin() noexcept;
iterator end() noexcept;
private:
const SourceFile* src;
const char8_t* ptr;
Morsel cache { };
void next_line() noexcept;
};

// An iterator for input source file line range.
struct SourceFile::LineRange::iterator {
using difference_type = std::ptrdiff_t;
using value_type = Morsel;
using iterator_category = std::input_iterator_tag;

explicit iterator(LineRange* r) noexcept : range{r} { }
Morsel operator*() const noexcept;
iterator& operator++() noexcept;
void operator++(int) noexcept { ++(*this); }
bool operator==(const iterator& that) const noexcept { return range == that.range; }
bool operator!=(const iterator& that) const noexcept = default;
private:
LineRange* range;
};

inline SourceFile::LineRange SourceFile::lines() const noexcept
{
return LineRange{*this};

Check warning

Code scanning / PREfast

The function is declared 'noexcept' but calls function 'LineRange()' which may throw exceptions (f.6). Warning

The function is declared 'noexcept' but calls function 'LineRange()' which may throw exceptions (f.6).
}

inline SourceFile::LineRange::iterator SourceFile::LineRange::begin() noexcept
{
return iterator{this};
}

inline SourceFile::LineRange::iterator SourceFile::LineRange::end() noexcept
{
return iterator{nullptr};
}
}
73 changes: 68 additions & 5 deletions src/input.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
# include <unistd.h>
#endif

#include <ipr/input>
#include <assert.h>
#include <iostream>
#include "ipr/input"

namespace ipr::input {
#ifdef _WIN32
Expand Down Expand Up @@ -46,13 +48,13 @@
LARGE_INTEGER s { };
if (not GetFileSizeEx(file.get_handle(), &s))
throw AccessError{ path, GetLastError() };
if (s.QuadPart)
if (s.QuadPart == 0)
return;
SystemHandle mapping = CreateFileMapping(file.get_handle(), nullptr, PAGE_READONLY, 0, 0, nullptr);
if (mapping.get_handle() == nullptr)
throw FileMappingError{ path, GetLastError() };
auto start = MapViewOfFile(mapping.get_handle(), FILE_MAP_READ, 0, 0, 0);
view = { reinterpret_cast<const std::byte*>(start), static_cast<View::size_type>(s.QuadPart) };
view = { reinterpret_cast<const char8_t*>(start), static_cast<View::size_type>(s.QuadPart) };

Check warning

Code scanning / PREfast

Don't use reinterpret_cast. A cast from void* can use static_cast (type.1). Warning

Don't use reinterpret_cast. A cast from void* can use static_cast (type.1).

Check warning

Code scanning / PREfast

Don't use a static_cast for arithmetic conversions. Use brace initialization, gsl::narrow_cast or gsl::narrow (type.1). Warning

Don't use a static_cast for arithmetic conversions. Use brace initialization, gsl::narrow_cast or gsl::narrow (type.1).
#else
struct stat s { };
errno = 0;
Expand All @@ -72,7 +74,7 @@
close(fd);
if (start == MAP_FAILED)
throw FileMappingError{ path };
view = { reinterpret_cast<std::byte*>(start), static_cast<View::size_type>(s.st_size) };
view = { reinterpret_cast<const char8_t*>(start), static_cast<View::size_type>(s.st_size) };
#endif
}

Expand All @@ -88,8 +90,69 @@
#ifdef _WIN32
UnmapViewOfFile(view.data());
#else
munmap(const_cast<std::byte*>(view.data()), view.size());
munmap(const_cast<char8_t*>(view.data()), view.size());
#endif
}
}

SourceFile::View SourceFile::contents(Morsel m) const noexcept
{
assert(m.length < view.size());
return { view.data() + m.offset, m.length };
}

// All code fragments directly indexable must have offsets and extents less than these limits.
constexpr auto max_offset = std::uint64_t{1} << 48;
constexpr auto max_extent = std::uint64_t{1} << 16;

// Characters from a raw input source file marking new lines: either CR+LR or just LF.
constexpr char8_t carriage_return = 0x0D; // '\r';
constexpr char8_t line_feed = 0x0A; // '\n';

void SourceFile::LineRange::next_line() noexcept
{
const auto offset = static_cast<std::uint64_t>(ptr - src->view.data());
assert(offset < max_offset);
const auto limit = src->view.size();
std::uint64_t idx = 0;
while (idx < limit and ptr[idx] != carriage_return and ptr[idx] != line_feed)
++idx;
assert(idx < max_extent);
cache.offset = offset;
cache.length = idx;

// Skip the new line marker.
if (idx < limit)
{
if (ptr[idx] == carriage_return and idx+1 < limit and ptr[idx+1] == line_feed)
++idx;
++idx;
}
ptr += idx;
}

SourceFile::LineRange::LineRange(const SourceFile& src) : src{&src}, ptr{src.view.data()}
{
// Skip a possible misguided UTF-8 BOM.
if (src.view.size() >= 3 and ptr[0] == 0xEF and ptr[1] == 0xBB and ptr[2] == 0xBF)
ptr += 3;
next_line();
}

Morsel SourceFile::LineRange::iterator::operator*() const noexcept
{
assert(range != nullptr);
return range->cache;
}

SourceFile::LineRange::iterator& SourceFile::LineRange::iterator::operator++() noexcept
{
assert(range != nullptr);
if (range->ptr >= range->src->view.data() + range->src->view.size())
range = nullptr;
else
range->next_line();

return *this;
}
}
1 change: 1 addition & 0 deletions tests/unit-tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ add_executable(${TEST_BINARY}
warehouse.cxx
phased-eval.cxx
specifiers.cxx
lines.cxx
)

target_link_libraries(${TEST_BINARY}
Expand Down
26 changes: 26 additions & 0 deletions tests/unit-tests/lines.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include "doctest/doctest.h"
#ifdef _WIN32
# include <windows.h>
# define WIDEN_(S) L ## S
# define WIDEN(S) WIDEN_(S)
#else
# define WIDEN(S) S
#endif

#include <iostream>
#include "ipr/input"

TEST_CASE("echo input file") {
ipr::input::SystemPath path = WIDEN(__FILE__);
ipr::input::SourceFile file{path};
auto n = 1;
std::cout << "file.size: " << file.contents().size() << std::endl;
for (auto line : file.lines())
{
std::cout << '[' << n << ']'
<< " -> {offset: " << line.offset
<< ", length: " << line.length << "}\n";
++n;
}
CHECK(n == 27); // Adjust this number based on the actual number of lines in the file
}
Loading