Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/openvic-dataloader/AbstractSyntaxTree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ namespace ovdl {
BasicAbstractSyntaxTree() = default;

explicit BasicAbstractSyntaxTree(file_type&& file)
: AbstractSyntaxTree(file.size() * file.visit_buffer([](auto&& buffer) -> size_t { return sizeof(typename std::decay_t<decltype(buffer)>::char_type); })),
: AbstractSyntaxTree(file.size() * sizeof(typename std::decay_t<decltype(file.buffer())>::char_type)),
_file { std::move(file) } {}

template<typename Encoding, typename MemoryResource = void>
Expand Down
40 changes: 18 additions & 22 deletions src/openvic-dataloader/DiagnosticLogger.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,23 +335,22 @@ namespace ovdl {
[[nodiscard]] Writer& annotation(AnnotationKind kind, BasicNodeLocation<LocCharT> loc, format_str<Args...> fmt, Args&&... args) {
std::basic_string<typename decltype(fmt.get())::value_type> output;

_file.visit_buffer([&](auto&& buffer) {
using char_type = typename std::decay_t<decltype(buffer)>::encoding::char_type;
lexy::buffer<lexy::utf8_char_encoding, void> const& buffer = _file.buffer();
using char_type = typename std::decay_t<decltype(buffer)>::encoding::char_type;

BasicNodeLocation<char_type> converted_loc = loc;
BasicNodeLocation<char_type> converted_loc = loc;

auto begin_loc = lexy::get_input_location(buffer, converted_loc.begin());
auto begin_loc = lexy::get_input_location(buffer, converted_loc.begin());

auto stream = _logger.make_callback_stream(output);
auto iter = _logger.make_ostream_iterator(stream);
auto stream = _logger.make_callback_stream(output);
auto iter = _logger.make_ostream_iterator(stream);

lexy_ext::diagnostic_writer _impl { buffer, { lexy::visualize_fancy } };
_impl.write_empty_annotation(iter);
_impl.write_annotation(iter, kind, begin_loc, converted_loc.end(),
[&](auto out, lexy::visualization_options) {
return lexy::_detail::write_str(out, fmt::format(fmt, std::forward<Args>(args)...).c_str());
});
});
lexy_ext::diagnostic_writer _impl { buffer, { lexy::visualize_fancy } };
_impl.write_empty_annotation(iter);
_impl.write_annotation(iter, kind, begin_loc, converted_loc.end(),
[&](auto out, lexy::visualization_options) {
return lexy::_detail::write_str(out, fmt::format(fmt, std::forward<Args>(args)...).c_str());
});

error::Annotation* annotation;
auto message = _logger.intern(output);
Expand Down Expand Up @@ -443,11 +442,9 @@ namespace ovdl {

template<std::derived_from<error::Error> T, typename... Args>
void log_with_error(T* error, DiagnosticKind kind, format_str<Args...> fmt, Args&&... args) {
file().visit_buffer(
[&](auto&& buffer) {
lexy_ext::diagnostic_writer impl { buffer };
log_with_impl(impl, error, kind, fmt, std::forward<Args>(args)...);
});
lexy::buffer<lexy::utf8_char_encoding, void> const& buffer = file().buffer();
lexy_ext::diagnostic_writer impl { buffer };
log_with_impl(impl, error, kind, fmt, std::forward<Args>(args)...);
}

template<std::derived_from<error::Error> T, typename... Args>
Expand Down Expand Up @@ -483,10 +480,9 @@ namespace ovdl {

Writer result(*this, file(), semantic);

file().visit_buffer([&](auto&& buffer) {
lexy_ext::diagnostic_writer impl { buffer };
log_with_impl(impl, semantic, kind, fmt, std::forward<Args>(args)...);
});
lexy::buffer<lexy::utf8_char_encoding, void> const& buffer = file().buffer();
lexy_ext::diagnostic_writer impl { buffer };
log_with_impl(impl, semantic, kind, fmt, std::forward<Args>(args)...);

if (kind == DiagnosticKind::error) {
_errored = true;
Expand Down
4 changes: 2 additions & 2 deletions src/openvic-dataloader/File.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ const char* File::path() const noexcept {
}

bool File::is_valid() const noexcept {
return _buffer.index() != 0 && !_buffer.valueless_by_exception() && visit_buffer([](auto&& buffer) { return buffer.data() != nullptr; });
return _buffer.data() != nullptr;
}

std::size_t File::size() const noexcept {
return _buffer.index() != 0 && !_buffer.valueless_by_exception() ? _buffer_size : 0;
return _buffer.size();
}
90 changes: 8 additions & 82 deletions src/openvic-dataloader/File.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
#include <cassert>
#include <concepts> // IWYU pragma: keep
#include <type_traits>
#include <variant>

#include <openvic-dataloader/NodeLocation.hpp>
#include <openvic-dataloader/detail/Utility.hpp>

#include <lexy/_detail/config.hpp>
#include <lexy/encoding.hpp>
#include <lexy/input/buffer.hpp>

Expand All @@ -33,87 +33,17 @@ namespace ovdl {

std::size_t size() const noexcept;

template<typename Encoding, typename MemoryResource = void>
constexpr bool is_buffer() const {
return buffer_ids::type_id<lexy::buffer<Encoding, MemoryResource>>() + 1 == _buffer.index();
lexy::buffer<lexy::utf8_char_encoding, void>& buffer() {
return _buffer;
}

template<typename Encoding, typename MemoryResource = void>
lexy::buffer<Encoding, MemoryResource>* try_get_buffer_as() {
return std::get_if<lexy::buffer<Encoding, MemoryResource>>(&_buffer);
lexy::buffer<lexy::utf8_char_encoding, void> const& buffer() const {
return _buffer;
}

template<typename Encoding, typename MemoryResource = void>
const lexy::buffer<Encoding, MemoryResource>* try_get_buffer_as() const {
return std::get_if<lexy::buffer<Encoding, MemoryResource>>(&_buffer);
}

template<typename Encoding, typename MemoryResource = void>
lexy::buffer<Encoding, MemoryResource>& get_buffer_as() {
assert((is_buffer<Encoding, MemoryResource>()));
return *std::get_if<lexy::buffer<Encoding, MemoryResource>>(&_buffer);
}

template<typename Encoding, typename MemoryResource = void>
const lexy::buffer<Encoding, MemoryResource>& get_buffer_as() const {
assert((is_buffer<Encoding, MemoryResource>()));
return *std::get_if<lexy::buffer<Encoding, MemoryResource>>(&_buffer);
}

#define SWITCH_LIST \
X(1) \
X(2) \
X(3) \
X(4) \
X(5) \
X(6)

#define X(NUM) \
case NUM: \
return visitor(std::get<NUM>(_buffer));

template<typename Visitor>
decltype(auto) visit_buffer(Visitor&& visitor) {
switch (_buffer.index()) {
SWITCH_LIST
case 0: return visitor(lexy::buffer<> {});
default: ovdl::detail::unreachable();
}
}

template<typename Return, typename Visitor>
Return visit_buffer(Visitor&& visitor) {
switch (_buffer.index()) {
SWITCH_LIST
case 0: return visitor(lexy::buffer<> {});
default: ovdl::detail::unreachable();
}
}

template<typename Visitor>
decltype(auto) visit_buffer(Visitor&& visitor) const {
switch (_buffer.index()) {
SWITCH_LIST
case 0: return visitor(lexy::buffer<> {});
default: ovdl::detail::unreachable();
}
}

template<typename Return, typename Visitor>
Return visit_buffer(Visitor&& visitor) const {
switch (_buffer.index()) {
SWITCH_LIST
case 0: return visitor(lexy::buffer<> {});
default: ovdl::detail::unreachable();
}
}
#undef X
#undef SWITCH_LIST

protected:
const char* _path = "";
std::size_t _buffer_size = 0;
detail::type_prepend_t<buffer_ids::variant_type, std::monostate> _buffer;
lexy::buffer<lexy::utf8_char_encoding, void> _buffer;
};

template<typename NodeT>
Expand All @@ -122,17 +52,13 @@ namespace ovdl {

BasicFile() = default;

template<typename Encoding, typename MemoryResource = void>
explicit BasicFile(const char* path, lexy::buffer<Encoding, MemoryResource>&& buffer)
explicit BasicFile(const char* path, lexy::buffer<lexy::utf8_char_encoding, void>&& buffer)
: File(path) {
_buffer_size = buffer.size();
_buffer = static_cast<std::remove_reference_t<decltype(buffer)>&&>(buffer);
}

template<typename Encoding, typename MemoryResource = void>
explicit BasicFile(lexy::buffer<Encoding, MemoryResource>&& buffer)
explicit BasicFile(lexy::buffer<lexy::utf8_char_encoding, void>&& buffer)
: File("") {
_buffer_size = buffer.size();
_buffer = static_cast<std::remove_reference_t<decltype(buffer)>&&>(buffer);
}

Expand Down
6 changes: 2 additions & 4 deletions src/openvic-dataloader/ParseState.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,10 @@ namespace ovdl {
_logger { this->file() },
BasicParseState(encoding) {}

template<typename Encoding, typename MemoryResource = void>
FileParseState(lexy::buffer<Encoding, MemoryResource>&& buffer, detail::Encoding encoding)
FileParseState(lexy::buffer<lexy::utf8_char_encoding, void>&& buffer, detail::Encoding encoding)
: FileParseState(file_type { std::move(buffer) }, encoding) {}

template<typename Encoding, typename MemoryResource = void>
FileParseState(const char* path, lexy::buffer<Encoding, MemoryResource>&& buffer, detail::Encoding encoding)
FileParseState(const char* path, lexy::buffer<lexy::utf8_char_encoding, void>&& buffer, detail::Encoding encoding)
: FileParseState(file_type { path, std::move(buffer) }, encoding) {}

FileParseState(const FileParseState&) = delete;
Expand Down
30 changes: 10 additions & 20 deletions src/openvic-dataloader/csv/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,11 @@ struct Parser::ParseHandler final : detail::BasicFileParseHandler<CsvParseState>
switch (parse_state().encoding()) {
using enum detail::Encoding;
case Ascii:
return lexy::parse<Node>(buffer<lexy::ascii_encoding>(), parse_state(), parse_state().logger().error_callback());
case Utf8:
case Windows1251:
case Windows1252:
case Gbk:
return lexy::parse<Node>(buffer<lexy::utf8_char_encoding>(), parse_state(), parse_state().logger().error_callback());
return lexy::parse<Node>(buffer(), parse_state(), parse_state().logger().error_callback());
OVDL_DEFAULT_CASE_UNREACHABLE(Unknown);
}
}();
Expand Down Expand Up @@ -211,24 +210,15 @@ const FilePosition Parser::get_error_position(const error::Error* error) const {
return {};
}

// TODO: Remove reinterpret_cast
// WARNING: This almost certainly breaks on utf16 and utf32 encodings, luckily we don't parse in that format
// This is purely to silence the node_location errors because char8_t is useless
#define REINTERPRET_IT(IT) reinterpret_cast<const std::decay_t<decltype(buffer)>::encoding::char_type*>((IT))

return _parse_handler->parse_state().file().visit_buffer(
[&](auto&& buffer) -> FilePosition {
auto loc_begin = lexy::get_input_location(buffer, REINTERPRET_IT(err_location.begin()));
FilePosition result { loc_begin.line_nr(), loc_begin.line_nr(), loc_begin.column_nr(), loc_begin.column_nr() };
if (err_location.begin() < err_location.end()) {
auto loc_end = lexy::get_input_location(buffer, REINTERPRET_IT(err_location.end()), loc_begin.anchor());
result.end_line = loc_end.line_nr();
result.end_column = loc_end.column_nr();
}
return result;
});

#undef REINTERPRET_IT
lexy::buffer<lexy::utf8_char_encoding, void> const& buffer = _parse_handler->buffer();
auto loc_begin = lexy::get_input_location(buffer, err_location.begin());
FilePosition result { loc_begin.line_nr(), loc_begin.line_nr(), loc_begin.column_nr(), loc_begin.column_nr() };
if (err_location.begin() < err_location.end()) {
auto loc_end = lexy::get_input_location(buffer, err_location.end(), loc_begin.anchor());
result.end_line = loc_end.line_nr();
result.end_column = loc_end.column_nr();
}
return result;
}

void Parser::print_errors_to(std::basic_ostream<char>& stream) const {
Expand Down
4 changes: 2 additions & 2 deletions src/openvic-dataloader/detail/InternalConcepts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ namespace ovdl::detail {
T t,
const T ct,
typename T::ast_type::file_type&& file,
lexy::buffer<lexy::default_encoding>&& buffer,
lexy::buffer<lexy::utf8_char_encoding>&& buffer,
ovdl::detail::Encoding encoding,
const char* path //
) {
Expand All @@ -109,7 +109,7 @@ namespace ovdl::detail {
T t,
const T ct,
typename T::file_type&& file,
lexy::buffer<lexy::default_encoding>&& buffer,
lexy::buffer<lexy::utf8_char_encoding>&& buffer,
ovdl::detail::Encoding encoding,
const char* path //
) {
Expand Down
31 changes: 17 additions & 14 deletions src/openvic-dataloader/detail/ParseHandler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,17 +83,17 @@ namespace ovdl::detail {
constexpr virtual buffer_error load_buffer_impl(lexy::buffer<lexy::default_encoding>&& buffer, const char* path = "", std::optional<Encoding> fallback = std::nullopt) = 0;
virtual const char* path_impl() const = 0;

template<detail::IsStateType State, detail::IsEncoding BufferEncoding>
template<detail::IsStateType State>
static constexpr auto generate_state = [](State* state, const char* path, auto&& buffer, Encoding encoding) {
if (path[0] != '\0') {
*state = {
path,
lexy::buffer<BufferEncoding>(std::move(buffer)),
lexy::buffer<lexy::utf8_char_encoding, void>(std::move(buffer)),
encoding
};
return;
}
*state = { lexy::buffer<BufferEncoding>(std::move(buffer)), encoding };
*state = { lexy::buffer<lexy::utf8_char_encoding, void>(std::move(buffer)), encoding };
};

template<detail::IsStateType State>
Expand Down Expand Up @@ -141,12 +141,9 @@ namespace ovdl::detail {
auto [encoding, is_alone] = encoding_detect::Detector { .default_fallback = fallback.value() }.detect_assess(buffer);
switch (encoding) {
using enum Encoding;
case Ascii: {
generate_state<State, lexy::ascii_encoding>(state, path, std::move(buffer), encoding);
break;
}
case Ascii:
case Utf8: {
generate_state<State, lexy::utf8_char_encoding>(state, path, std::move(buffer), encoding);
generate_state<State>(state, path, std::move(buffer), encoding);
break;
}
case Unknown: {
Expand Down Expand Up @@ -211,9 +208,12 @@ namespace ovdl::detail {
return _parse_state;
}

template<typename Encoding>
constexpr const auto& buffer() const {
return _parse_state.file().template get_buffer_as<Encoding>();
constexpr lexy::buffer<lexy::utf8_char_encoding, void>& buffer() {
return _parse_state.file().buffer();
}

constexpr lexy::buffer<lexy::utf8_char_encoding, void> const& buffer() const {
return _parse_state.file().buffer();
}

protected:
Expand Down Expand Up @@ -248,9 +248,12 @@ namespace ovdl::detail {
return _parse_state;
}

template<typename Encoding>
constexpr const auto& buffer() const {
return _parse_state.ast().file().template get_buffer_as<Encoding>();
constexpr lexy::buffer<lexy::utf8_char_encoding, void>& buffer() {
return _parse_state.ast().file().buffer();
}

constexpr lexy::buffer<lexy::utf8_char_encoding, void> const& buffer() const {
return _parse_state.ast().file().buffer();
}

protected:
Expand Down
Loading