Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ AlignTrailingComments: true
AlignEscapedNewlines: Left
AlignAfterOpenBracket: DontAlign
AccessModifierOffset: -4
Macros:
- "OVDL_DEFAULT_CASE_UNREACHABLE(OPTION)=default: ovdl::detail::unreachable()"
IncludeCategories:
- Regex: <[[:alnum:]_]+>
Priority: 1
Expand Down
2 changes: 1 addition & 1 deletion include/openvic-dataloader/detail/Encoding.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include <cstdint>

namespace ovdl::detail {
enum class Encoding : std::int8_t {
enum class Encoding : std::uint8_t {
Unknown,
Ascii,
Utf8,
Expand Down
19 changes: 19 additions & 0 deletions include/openvic-dataloader/detail/Utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,25 @@

#include <openvic-dataloader/detail/Concepts.hpp>

#ifdef DEBUG_ENABLED
#define OVDL_DEFAULT_CASE_UNREACHABLE(...) \
__VA_OPT__(case __VA_ARGS__ : ovdl::detail::unreachable())
#else
#define OVDL_DEFAULT_CASE_UNREACHABLE(...) \
default: ovdl::detail::unreachable()
#endif

#ifdef __GNUC__
#define OVDL_BEGIN_IGNORE_WARNING_RETURN_TYPE \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wreturn-type\"")
#define OVDL_END_IGNORE_WARNING_RETURN_TYPE \
_Pragma("GCC diagnostic pop")
#else
#define OVDL_BEGIN_IGNORE_WARNING_RETURN_TYPE
#define OVDL_END_IGNORE_WARNING_RETURN_TYPE
#endif

#if __has_cpp_attribute(msvc::no_unique_address)
#define OVDL_NO_UNIQUE_ADDRESS \
_Pragma("warning(push)") _Pragma("warning(disable : 4848)") \
Expand Down
1 change: 1 addition & 0 deletions src/openvic-dataloader/File.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ namespace ovdl {
struct File {
using buffer_ids = detail::TypeRegister<
lexy::buffer<lexy::default_encoding, void>,
lexy::buffer<lexy::ascii_encoding, void>,
lexy::buffer<lexy::utf8_char_encoding, void>,
lexy::buffer<lexy::utf8_encoding, void>,
lexy::buffer<lexy::utf16_encoding, void>,
Expand Down
126 changes: 24 additions & 102 deletions src/openvic-dataloader/csv/CsvGrammar.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <initializer_list>
#include <string>
#include <tuple>
#include <type_traits>
#include <vector>

#include <openvic-dataloader/csv/LineObject.hpp>
Expand All @@ -14,9 +13,7 @@
#include <lexy/dsl.hpp>
#include <lexy/encoding.hpp>

#include "detail/Convert.hpp"
#include "detail/InternalConcepts.hpp"
#include "detail/dsl.hpp"

// Grammar Definitions //
namespace ovdl::csv::grammar {
Expand All @@ -38,21 +35,6 @@ namespace ovdl::csv::grammar {
}
};

constexpr bool IsUtf8(auto encoding) {
return std::same_as<std::decay_t<decltype(encoding)>, lexy::utf8_char_encoding>;
}

template<ParseOptions Options, typename String>
constexpr auto convert_as_string = convert::convert_as_string<
String,
ConvertErrorHandler>;

constexpr auto ansi_character = lexy::dsl::ascii::character / dsl::lit_b_range<0x80, 0xFF>;
constexpr auto ansi_control =
lexy::dsl::ascii::control /
lexy::dsl::lit_b<0x81> / lexy::dsl::lit_b<0x8D> / lexy::dsl::lit_b<0x8F> /
lexy::dsl::lit_b<0x90> / lexy::dsl::lit_b<0x9D>;

constexpr auto utf_character = lexy::dsl::unicode::character;
constexpr auto utf_control = lexy::dsl::unicode::control;

Expand All @@ -75,47 +57,20 @@ namespace ovdl::csv::grammar {

template<ParseOptions Options>
struct CsvGrammar {
struct StringValue : lexy::scan_production<std::string>,
lexy::token_production {

template<typename Context, typename Reader>
static constexpr scan_result scan(lexy::rule_scanner<Context, Reader>& scanner, detail::IsFileParseState auto& state) {
using encoding = typename Reader::encoding;

constexpr auto rule = [] {
// Arbitrary code points
auto c = [] {
if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) {
return ansi_character - ansi_control;
} else {
return utf_character - utf_control;
}
}();

auto back_escape = lexy::dsl::backslash_escape //
.symbol<escaped_symbols>();

auto quote_escape = lexy::dsl::escape(lexy::dsl::lit_c<'"'>) //
.template symbol<escaped_quote>();

return lexy::dsl::delimited(lexy::dsl::lit_c<'"'>, lexy::dsl::not_followed_by(lexy::dsl::lit_c<'"'>, lexy::dsl::lit_c<'"'>))(c, back_escape, quote_escape);
}();

lexy::scan_result<std::string> str_result = scanner.template parse<std::string>(rule);
if (!scanner || !str_result) {
return lexy::scan_failed;
}
return str_result.value();
}
struct StringValue : lexy::token_production {
static constexpr auto rule = [] {
auto quote = lexy::dsl::lit_c<'"'>;
auto c = utf_character - utf_control;
auto back_escape = lexy::dsl::backslash_escape.symbol<escaped_symbols>();
auto quote_escape = lexy::dsl::escape(lexy::dsl::lit_c<'"'>).template symbol<escaped_quote>();

static constexpr auto rule = lexy::dsl::peek(lexy::dsl::lit_c<'"'>) >> lexy::dsl::scan;
return lexy::dsl::delimited(quote, lexy::dsl::not_followed_by(quote, quote))(c, back_escape, quote_escape);
}();

static constexpr auto value = convert_as_string<Options, std::string> >> lexy::forward<std::string>;
static constexpr auto value = lexy::as_string<std::string>;
};

struct PlainValue : lexy::scan_production<std::string>,
lexy::token_production {

struct PlainValue : lexy::token_production {
template<auto character>
static constexpr auto _escape_check = character - (lexy::dsl::lit_b<Options.SepChar> / lexy::dsl::ascii::newline);

Expand All @@ -124,57 +79,24 @@ namespace ovdl::csv::grammar {
static constexpr auto value = lexy::constant('\n');
};

template<typename Context, typename Reader>
static constexpr scan_result scan(lexy::rule_scanner<Context, Reader>& scanner, detail::IsFileParseState auto& state) {
using encoding = typename Reader::encoding;

constexpr auto rule = [] {
constexpr auto character = [] {
if constexpr (std::same_as<encoding, lexy::default_encoding> || std::same_as<encoding, lexy::byte_encoding>) {
return ansi_character;
} else {
return utf_character;
}
}();

if constexpr (Options.SupportStrings) {
return lexy::dsl::identifier(character - (lexy::dsl::lit_b<Options.SepChar> / lexy::dsl::ascii::newline));
} else {
constexpr auto backslash = lexy::dsl::lit_b<'\\'>;

constexpr auto escape_check_char = _escape_check<character>;
constexpr auto escape_rule = lexy::dsl::p<Backslash>;

return lexy::dsl::list(
lexy::dsl::identifier(escape_check_char - backslash) |
escape_rule |
lexy::dsl::capture(escape_check_char) //
);
}
}();

static constexpr auto rule = [] {
if constexpr (Options.SupportStrings) {
auto lexeme_result = scanner.template parse<lexy::lexeme<Reader>>(rule);
if (!scanner || !lexeme_result) {
return lexy::scan_failed;
}
return std::string { lexeme_result.value().begin(), lexeme_result.value().end() };
return lexy::dsl::identifier(utf_character - (lexy::dsl::lit_b<Options.SepChar> / lexy::dsl::ascii::newline));
} else {
lexy::scan_result<std::string> str_result = scanner.template parse<std::string>(rule);
if (!scanner || !str_result) {
return lexy::scan_failed;
}
return str_result.value();
}
}
constexpr auto backslash = lexy::dsl::lit_b<'\\'>;

static constexpr auto rule =
dsl::peek(
_escape_check<ansi_character>,
_escape_check<utf_character>) >>
lexy::dsl::scan;
constexpr auto escape_check_char = _escape_check<utf_character>;
constexpr auto escape_rule = lexy::dsl::p<Backslash>;

return lexy::dsl::list(
lexy::dsl::identifier(escape_check_char - backslash) |
escape_rule |
lexy::dsl::capture(escape_check_char) //
);
}
}();

static constexpr auto value = convert_as_string<Options, std::string> >> lexy::forward<std::string>;
static constexpr auto value = lexy::as_string<std::string>;
};

struct Value {
Expand Down
16 changes: 11 additions & 5 deletions src/openvic-dataloader/csv/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,26 @@ using namespace ovdl::csv;
struct Parser::ParseHandler final : detail::BasicFileParseHandler<CsvParseState> {
template<typename Node>
std::optional<DiagnosticLogger::error_range> parse() {
if (parse_state().encoding() == detail::Encoding::Unknown) {
parse_state().logger().error("tried to parse unknown encoding");
return parse_state().logger().get_errors();
}

OVDL_BEGIN_IGNORE_WARNING_RETURN_TYPE
auto result = [&] {
switch (parse_state().encoding()) {
using enum detail::Encoding;
case Ascii:
return lexy::parse<Node>(buffer<lexy::ascii_encoding>(), parse_state(), parse_state().logger().error_callback());
case Utf8:
return lexy::parse<Node>(buffer<lexy::utf8_char_encoding>(), parse_state(), parse_state().logger().error_callback());
case Unknown:
case Windows1251:
case Windows1252:
return lexy::parse<Node>(buffer<lexy::default_encoding>(), parse_state(), parse_state().logger().error_callback());
default:
ovdl::detail::unreachable();
return lexy::parse<Node>(buffer<lexy::utf8_char_encoding>(), parse_state(), parse_state().logger().error_callback());
OVDL_DEFAULT_CASE_UNREACHABLE(Unknown);
}
}();
OVDL_END_IGNORE_WARNING_RETURN_TYPE

if (!result) {
return this->parse_state().logger().get_errors();
}
Expand Down
Loading