Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions velox/type/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ endif()
add_subdirectory(parser)
add_subdirectory(tz)
add_subdirectory(fbhive)
add_subdirectory(fbclp)

velox_add_library(
velox_type
Expand Down
45 changes: 45 additions & 0 deletions velox/type/fbclp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if(${VELOX_ENABLE_CLP_CONNECTOR})
if(${VELOX_BUILD_TESTING})
add_subdirectory(tests)
endif()

bison_target(
ClpTypeParser ClpTypeParser.yy
${CMAKE_CURRENT_BINARY_DIR}/ClpTypeParser.yy.cc
DEFINES_FILE ${CMAKE_CURRENT_BINARY_DIR}/ClpTypeParser.yy.h
COMPILE_FLAGS "-Werror -Wno-deprecated")

flex_target(
ClpTypeParserScanner ClpTypeParser.ll
${CMAKE_CURRENT_BINARY_DIR}/ClpScanner.cpp
COMPILE_FLAGS "-Cf --prefix=veloxtpclp")

add_flex_bison_dependency(ClpTypeParserScanner ClpTypeParser)

if(VELOX_MONO_LIBRARY)
add_custom_target(
velox_type_fbclp_parser_gen_src
DEPENDS ${BISON_ClpTypeParser_OUTPUTS}
${FLEX_ClpTypeParserScanner_OUTPUTS})
add_dependencies(velox velox_type_fbclp_parser_gen_src)
endif()
velox_add_library(velox_type_fbclp_parser ${BISON_ClpTypeParser_OUTPUTS}
${FLEX_ClpTypeParserScanner_OUTPUTS} ClpParserUtil.cpp)
velox_include_directories(velox_type_fbclp_parser
PRIVATE ${PROJECT_BINARY_DIR} ${FLEX_INCLUDE_DIRS})
velox_link_libraries(velox_type_fbclp_parser velox_common_base)
endif()
68 changes: 68 additions & 0 deletions velox/type/fbclp/ClpParserUtil.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <string>
#include "velox/type/Type.h"

namespace facebook::velox::type::fbclp {

TypePtr typeFromString(
const std::string& type,
bool failIfNotRegistered = true) {
auto upper = type;
std::transform(upper.begin(), upper.end(), upper.begin(), ::toupper);
if (upper == "INT") {
upper = "INTEGER";
} else if (upper == "DOUBLE PRECISION") {
upper = "DOUBLE";
}
auto inferredType = getType(upper, {});
if (failIfNotRegistered == true && inferredType == nullptr) {
VELOX_UNSUPPORTED("Failed to parse type [{}]. Type not registered.", type);
}
return inferredType;
}

TypePtr customTypeWithChildren(
const std::string& name,
const std::vector<TypePtr>& children) {
std::vector<TypeParameter> params;
params.reserve(children.size());
for (auto& child : children) {
params.emplace_back(child);
}
auto type = getType(name, params);
VELOX_CHECK_NOT_NULL(
type, "Failed to parse custom type with children [{}]", name);
return type;
}

std::pair<std::string, std::shared_ptr<const Type>> inferTypeWithSpaces(
std::vector<std::string>& words,
bool cannotHaveFieldName = false) {
VELOX_CHECK_GE(words.size(), 2);
const auto& fieldName = words[0];
const auto allWords = folly::join(" ", words);
// Fail if cannotHaveFieldName = true.
auto type = typeFromString(allWords, cannotHaveFieldName);
if (type) {
return std::make_pair("", type);
}
return std::make_pair(
fieldName, typeFromString(allWords.substr(fieldName.size() + 1)));
}

} // namespace facebook::velox::type::fbclp
43 changes: 43 additions & 0 deletions velox/type/fbclp/ClpParserUtil.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <string>
#include "velox/type/Type.h"

namespace facebook::velox::type::fbclp {

/// Normalize Presto types such as INT and DOUBLE PRECISION and convert to Velox
/// type.
TypePtr typeFromString(
const std::string& type,
bool failIfNotRegistered = true);

TypePtr customTypeWithChildren(
const std::string& name,
const std::vector<TypePtr>& children);

/// Convert words with spaces to a Velox type.
/// First check if all the words are a Velox type.
/// Then check if the first word is a field name and the remaining words are a
/// Velox type. If cannotHaveFieldName = true, then all words must be a Velox
/// type.
std::pair<std::string, TypePtr> inferTypeWithSpaces(
std::vector<std::string>& words,
bool cannotHaveFieldName = false);

} // namespace facebook::velox::type::fbclp
55 changes: 55 additions & 0 deletions velox/type/fbclp/ClpScanner.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cmath>
#include <iostream>
#include <sstream>
#include <string>
#include <unordered_map>

#include "velox/common/base/Exceptions.h"
#include "velox/type/Type.h"

namespace facebook::velox::type::fbclp {

class ClpScanner : public yyFlexLexer {
public:
ClpScanner(
std::istream& arg_yyin,
std::ostream& arg_yyout,
TypePtr& outputType,
const std::string_view input)
: yyFlexLexer(&arg_yyin, &arg_yyout),
outputType_(outputType),
input_(input){};
int lex(ClpParser::semantic_type* yylval);

void setType(TypePtr type) {
outputType_ = std::move(type);
}

// Store input to print it as part of the error message.
std::string_view input() {
return input_;
}

private:
TypePtr& outputType_;
const std::string_view input_;
};

} // namespace facebook::velox::type::fbclp
37 changes: 37 additions & 0 deletions velox/type/fbclp/ClpTypeParser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <string>
#include "velox/type/Type.h"

namespace facebook::velox::type::fbclp {

/// Parses a type string in Presto format to Velox type.
/// Example type strings:
/// row(col0 bigint, varchar)
/// array(bigint)
/// map(bigint, array(bigint))
/// function(bigint,bigint,bigint)
/// The parsing is case-insensitive. i.e. 'Row' and 'row' are equal.
/// Field names for rows are optional.
/// Quoted field names are supported.
/// All custom types need to be registered. An error is thrown otherwise.
/// Uses the Type::getType API to convert a string to Velox type.
TypePtr parseClpType(const std::string& typeText);

} // namespace facebook::velox::type::fbclp
76 changes: 76 additions & 0 deletions velox/type/fbclp/ClpTypeParser.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
%{
#include <vector>
#include <memory>

#include "velox/type/fbclp/ClpTypeParser.yy.h" // @manual
#include "velox/type/fbclp/ClpScanner.h"
#define YY_DECL int facebook::velox::type::fbclp::ClpScanner::lex(facebook::velox::type::fbclp::ClpParser::semantic_type *yylval)
%}

%option c++ noyywrap noyylineno nodefault caseless

A [A|a]
B [B|b]
C [C|c]
D [D|d]
E [E|e]
F [F|f]
G [G|g]
H [H|h]
I [I|i]
J [J|j]
K [K|k]
L [L|l]
M [M|m]
O [O|o]
P [P|p]
R [R|r]
S [S|s]
T [T|t]
U [U|u]
W [W|w]
X [X|x]
Y [Y|y]
Z [Z|z]

WORD ([[:alpha:][:alnum:]_\-@#\$\\]*)
QUOTED_ID (['"'][[:alnum:][:space:]_]*['"'])
NUMBER ([[:digit:]]+)
VARIABLE (VARCHAR|VARBINARY)

%%

"(" return ClpParser::token::LPAREN;
")" return ClpParser::token::RPAREN;
"," return ClpParser::token::COMMA;
(ARRAY) return ClpParser::token::ARRAY;
(MAP) return ClpParser::token::MAP;
(FUNCTION) return ClpParser::token::FUNCTION;
(DECIMAL) return ClpParser::token::DECIMAL;
(ROW) return ClpParser::token::ROW;
{VARIABLE} yylval->build<std::string>(YYText()); return ClpParser::token::VARIABLE;
{NUMBER} yylval->build<long long>(folly::to<int>(YYText())); return ClpParser::token::NUMBER;
{WORD} yylval->build<std::string>(YYText()); return ClpParser::token::WORD;
{QUOTED_ID} yylval->build<std::string>(YYText()); return ClpParser::token::QUOTED_ID;
<<EOF>> return ClpParser::token::YYEOF;
. /* no action on unmatched input */

%%

int yyFlexLexer::yylex() {
throw std::runtime_error("Bad call to yyFlexLexer::yylex()");
}

#include "velox/type/fbclp/ClpTypeParser.h"

facebook::velox::TypePtr facebook::velox::type::fbclp::parseClpType(const std::string& typeText)
{
std::istringstream is(typeText);
std::ostringstream os;
facebook::velox::TypePtr type;
facebook::velox::type::fbclp::ClpScanner scanner{is, os, type, typeText};
facebook::velox::type::fbclp::ClpParser parser{ &scanner };
parser.parse();
VELOX_CHECK(type, "Failed to parse type [{}]", typeText);
return type;
}
Loading
Loading