Skip to content
This repository was archived by the owner on Sep 27, 2019. It is now read-only.

Commit bd4c2a7

Browse files
committed
Get string inputs working
1 parent 92034f2 commit bd4c2a7

File tree

8 files changed

+52
-10
lines changed

8 files changed

+52
-10
lines changed

src/codegen/operator/csv_scan_translator.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,16 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
132132
llvm::Value *data_len) const {
133133
auto *input_func = SqlType().GetInputFunction(codegen, ai_->type);
134134
auto *raw_val = codegen.CallFunc(input_func, {type, data_ptr, data_len});
135-
return codegen::Value{ai_->type, raw_val, nullptr,
136-
codegen.ConstBool(false)};
135+
if (SqlType().IsVariableLength()) {
136+
// StrWithLen
137+
llvm::Value *str_ptr = codegen->CreateExtractValue(raw_val, 0);
138+
llvm::Value *str_len = codegen->CreateExtractValue(raw_val, 1);
139+
return codegen::Value{ai_->type, str_ptr, str_len,
140+
codegen.ConstBool(false)};
141+
} else {
142+
return codegen::Value{ai_->type, raw_val, nullptr,
143+
codegen.ConstBool(false)};
144+
}
137145
}
138146

139147
Value Access(CodeGen &codegen, UNUSED_ATTRIBUTE RowBatch::Row &row) override {

src/codegen/proxy/string_functions_proxy.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "codegen/proxy/executor_context_proxy.h"
1616
#include "codegen/proxy/pool_proxy.h"
17+
#include "codegen/proxy/runtime_functions_proxy.h"
1718

1819
namespace peloton {
1920
namespace codegen {
@@ -32,6 +33,7 @@ DEFINE_METHOD(peloton::function, StringFunctions, Substr);
3233
DEFINE_METHOD(peloton::function, StringFunctions, Repeat);
3334
DEFINE_METHOD(peloton::function, StringFunctions, CompareStrings);
3435
DEFINE_METHOD(peloton::function, StringFunctions, WriteString);
36+
DEFINE_METHOD(peloton::function, StringFunctions, InputString);
3537

3638
} // namespace codegen
3739
} // namespace peloton

src/codegen/type/varchar_type.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -597,9 +597,8 @@ void Varchar::GetTypeForMaterialization(CodeGen &codegen, llvm::Type *&val_type,
597597
}
598598

599599
llvm::Function *Varchar::GetInputFunction(
600-
UNUSED_ATTRIBUTE CodeGen &codegen,
601-
UNUSED_ATTRIBUTE const Type &type) const {
602-
throw NotImplementedException{"String input not implemented yet"};
600+
CodeGen &codegen, UNUSED_ATTRIBUTE const Type &type) const {
601+
return StringFunctionsProxy::InputString.GetFunction(codegen);
603602
}
604603

605604
llvm::Function *Varchar::GetOutputFunction(

src/codegen/util/csv_scanner.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,9 @@ const char *CSVScanner::NextLine() {
197197

198198
uint32_t line_end = buffer_begin_;
199199

200+
char quote = quote_;
201+
char escape = (quote_ == escape_ ? static_cast<char>('\0') : escape_);
202+
200203
while (true) {
201204
if (line_end >= buffer_end_) {
202205
// We need to read more data from the CSV file. But first, we need to copy
@@ -219,13 +222,13 @@ const char *CSVScanner::NextLine() {
219222
// Read character
220223
char c = buffer_[line_end];
221224

222-
if (in_quote && c == escape_) {
223-
last_was_escape = true;
225+
if (in_quote && c == escape) {
226+
last_was_escape = !last_was_escape;
224227
}
225-
if (c == quote_ && !last_was_escape) {
226-
in_quote = true;
228+
if (c == quote && !last_was_escape) {
229+
in_quote = !in_quote;
227230
}
228-
if (c != escape_) {
231+
if (c != escape) {
229232
last_was_escape = false;
230233
}
231234

src/function/string_functions.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,5 +245,13 @@ void StringFunctions::WriteString(const char *data, uint32_t len, char *buf,
245245
*reinterpret_cast<Varlen **>(buf) = area;
246246
}
247247

248+
// TODO(pmenon): UTF8 checking, string checking, lots of error handling here
249+
// TODO(pmenon): Why do we need this +1 on the length ?
250+
StringFunctions::StrWithLen StringFunctions::InputString(
251+
UNUSED_ATTRIBUTE const codegen::type::Type &type, const char *data,
252+
uint32_t len) {
253+
return StringFunctions::StrWithLen{data, len + 1};
254+
}
255+
248256
} // namespace function
249257
} // namespace peloton

src/include/codegen/proxy/string_functions_proxy.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ PROXY(StringFunctions) {
3131
DECLARE_METHOD(Repeat);
3232
DECLARE_METHOD(CompareStrings);
3333
DECLARE_METHOD(WriteString);
34+
DECLARE_METHOD(InputString);
3435
};
3536

3637
PROXY(StrWithLen) {

src/include/function/string_functions.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616

1717
namespace peloton {
1818

19+
namespace codegen {
20+
namespace type {
21+
class Type;
22+
} // namespace type
23+
} // namespace codegen
24+
1925
namespace executor {
2026
class ExecutorContext;
2127
} // namespace executor
@@ -104,6 +110,16 @@ class StringFunctions {
104110
*/
105111
static void WriteString(const char *data, uint32_t len, char *buf,
106112
peloton::type::AbstractPool &pool);
113+
114+
/**
115+
*
116+
* @param type
117+
* @param data
118+
* @param len
119+
* @return
120+
*/
121+
static StrWithLen InputString(const codegen::type::Type &type,
122+
const char *data, uint32_t len);
107123
};
108124

109125
} // namespace function

src/traffic_cop/traffic_cop.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,11 @@ FieldInfo TrafficCop::GetColumnFieldForValueType(std::string column_name,
523523
field_size = 255;
524524
break;
525525
}
526+
case type::TypeId::DATE: {
527+
field_type = PostgresValueType::DATE;
528+
field_size = 4;
529+
break;
530+
}
526531
case type::TypeId::TIMESTAMP: {
527532
field_type = PostgresValueType::TIMESTAMPS;
528533
field_size = 64; // FIXME: Bytes???

0 commit comments

Comments
 (0)