2020#include " codegen/proxy/csv_scanner_proxy.h"
2121#include " codegen/proxy/runtime_functions_proxy.h"
2222#include " codegen/type/sql_type.h"
23+ #include " codegen/vector.h"
2324#include " planner/csv_scan_plan.h"
2425
2526namespace peloton {
@@ -28,23 +29,25 @@ namespace codegen {
2829CSVScanTranslator::CSVScanTranslator (const planner::CSVScanPlan &scan,
2930 CompilationContext &context,
3031 Pipeline &pipeline)
31- : OperatorTranslator(context, pipeline), scan_(scan ) {
32+ : OperatorTranslator(scan, context, pipeline ) {
3233 // Register the CSV scanner instance
33- auto &runtime_state = context.GetRuntimeState ();
34- scanner_id_ = runtime_state .RegisterState (
34+ auto &query_state = context.GetQueryState ();
35+ scanner_id_ = query_state .RegisterState (
3536 " csvScanner" , CSVScannerProxy::GetType (GetCodeGen ()));
3637
3738 // Load information about the attributes output by the scan plan
38- scan_ .GetAttributes (output_attributes_);
39+ scan .GetAttributes (output_attributes_);
3940}
4041
41- void CSVScanTranslator::InitializeState () {
42+ void CSVScanTranslator::InitializeQueryState () {
4243 auto &codegen = GetCodeGen ();
4344
45+ auto &scan = GetPlanAs<planner::CSVScanPlan>();
46+
4447 // Arguments
4548 llvm::Value *scanner_ptr = LoadStatePtr (scanner_id_);
46- llvm::Value *exec_ctx_ptr = GetCompilationContext (). GetExecutorContextPtr ();
47- llvm::Value *file_path = codegen.ConstString (scan_ .GetFileName (), " filePath" );
49+ llvm::Value *exec_ctx_ptr = GetExecutorContextPtr ();
50+ llvm::Value *file_path = codegen.ConstString (scan .GetFileName (), " filePath" );
4851
4952 auto num_cols = static_cast <uint32_t >(output_attributes_.size ());
5053
@@ -71,20 +74,24 @@ void CSVScanTranslator::InitializeState() {
7174 // Cast the runtime type to an opaque void*. This is because we're calling
7275 // into pre-compiled C++ that doesn't know that the dynamically generated
7376 // RuntimeState* looks like.
74- llvm::Value *runtime_state_ptr = codegen->CreatePointerCast (
77+ llvm::Value *query_state_ptr = codegen->CreatePointerCast (
7578 codegen.GetState (), codegen.VoidType ()->getPointerTo ());
7679
7780 // Call CSVScanner::Init()
7881 codegen.Call (CSVScannerProxy::Init,
7982 {scanner_ptr, exec_ctx_ptr, file_path, output_col_types,
80- codegen.Const32 (num_cols), consumer_func, runtime_state_ptr ,
81- codegen.Const8 (scan_ .GetDelimiterChar ()),
82- codegen.Const8 (scan_ .GetQuoteChar ()),
83- codegen.Const8 (scan_ .GetEscapeChar ())});
83+ codegen.Const32 (num_cols), consumer_func, query_state_ptr ,
84+ codegen.Const8 (scan .GetDelimiterChar ()),
85+ codegen.Const8 (scan .GetQuoteChar ()),
86+ codegen.Const8 (scan .GetEscapeChar ())});
8487}
8588
8689namespace {
8790
91+ /* *
92+ * This is a deferred column access class configured to load the contents of a
93+ * given column.
94+ */
8895class CSVColumnAccess : public RowBatch ::AttributeAccess {
8996 public:
9097 CSVColumnAccess (const planner::AttributeInfo *ai, llvm::Value *csv_columns,
@@ -94,6 +101,12 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
94101 null_str_(std::move(null_str)),
95102 runtime_null_(runtime_null_str) {}
96103
104+ // ////////////////////////////////////////////////////////////////////////////
105+ // /
106+ // / Accessors
107+ // /
108+ // ////////////////////////////////////////////////////////////////////////////
109+
97110 llvm::Value *Columns () const { return csv_columns_; }
98111
99112 uint32_t ColumnIndex () const { return ai_->attribute_id ; }
@@ -102,6 +115,25 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
102115
103116 const type::SqlType &SqlType () const { return ai_->type .GetSqlType (); }
104117
118+ // ////////////////////////////////////////////////////////////////////////////
119+ // /
120+ // / Logic
121+ // /
122+ // ////////////////////////////////////////////////////////////////////////////
123+
124+ /* *
125+ * Check if a column's value is considered NULL. Given a pointer to the
126+ * column's string value, and the length of the string, this function will
127+ * check if the column's value is determined to be NULL. This is done by
128+ * comparing the column's contents with the NULL string configured in the
129+ * CSV scan plan (i.e., provided by the user).
130+ *
131+ * @param codegen The codegen instance
132+ * @param data_ptr A pointer to the column's string value
133+ * @param data_len The length of the column's string value
134+ * @return True if the column is equivalent to the NULL string. False
135+ * otherwise.
136+ */
105137 llvm::Value *IsNull (CodeGen &codegen, llvm::Value *data_ptr,
106138 llvm::Value *data_len) const {
107139 uint32_t null_str_len = static_cast <uint32_t >(null_str_.length ());
@@ -127,6 +159,16 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
127159 return check_null.BuildPHI (cmp_res, codegen.ConstBool (false ));
128160 }
129161
162+ /* *
163+ * Load the value of the given column with the given type, ignoring a null
164+ * check.
165+ *
166+ * @param codegen The codegen instance
167+ * @param type The SQL type of the column
168+ * @param data_ptr A pointer to the column's string representation
169+ * @param data_len The length of the column's string representation
170+ * @return The parsed value
171+ */
130172 Value LoadValueIgnoreNull (CodeGen &codegen, llvm::Value *type,
131173 llvm::Value *data_ptr,
132174 llvm::Value *data_len) const {
@@ -144,6 +186,15 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
144186 }
145187 }
146188
189+ /* *
190+ * Access this column in the given row. In reality, this function pulls out
191+ * the column information from the CSVScanner state and loads/parses the
192+ * column's value.
193+ *
194+ * @param codegen The codegen instance
195+ * @param row The row. This isn't used.
196+ * @return The value of the column
197+ */
147198 Value Access (CodeGen &codegen, UNUSED_ATTRIBUTE RowBatch::Row &row) override {
148199 // Load the type, data pointer and length values for the column
149200 auto *type = codegen->CreateConstInBoundsGEP2_32 (
@@ -178,22 +229,31 @@ class CSVColumnAccess : public RowBatch::AttributeAccess {
178229 }
179230
180231 private:
232+ // Information about the attribute
181233 const planner::AttributeInfo *ai_;
234+
235+ // A pointer to the array of columns
182236 llvm::Value *csv_columns_;
237+
238+ // The NULL string configured for the CSV scan
183239 const std::string null_str_;
240+
241+ // The runtime NULL string (a constant in LLVM)
184242 llvm::Value *runtime_null_;
185243};
186244
187245} // namespace
188246
247+ // We define the callback/consumer function for CSV parsing here
189248void CSVScanTranslator::DefineAuxiliaryFunctions () {
190249 CodeGen &codegen = GetCodeGen ();
191250 CompilationContext &cc = GetCompilationContext ();
192251
252+ auto &scan = GetPlanAs<planner::CSVScanPlan>();
253+
193254 // Define consumer function here
194255 std::vector<FunctionDeclaration::ArgumentInfo> arg_types = {
195- {" runtimeState" ,
196- cc.GetRuntimeState ().FinalizeType (codegen)->getPointerTo ()}};
256+ {" queryState" , cc.GetQueryState ().GetType ()->getPointerTo ()}};
197257 FunctionDeclaration decl{codegen.GetCodeContext (), " consumer" ,
198258 FunctionDeclaration::Visibility::Internal,
199259 codegen.VoidType (), arg_types};
@@ -209,13 +269,13 @@ void CSVScanTranslator::DefineAuxiliaryFunctions() {
209269 llvm::Value *cols = codegen->CreateLoad (codegen->CreateConstInBoundsGEP2_32 (
210270 CSVScannerProxy::GetType (codegen), LoadStatePtr (scanner_id_), 0 , 1 ));
211271
212- llvm::Value *null_str = codegen.ConstString (scan_ .GetNullString (), " null" );
272+ llvm::Value *null_str = codegen.ConstString (scan .GetNullString (), " null" );
213273
214274 // Add accessors for all columns into the row batch
215275 std::vector<CSVColumnAccess> column_accessors;
216276 for (uint32_t i = 0 ; i < output_attributes_.size (); i++) {
217277 column_accessors.emplace_back (output_attributes_[i], cols,
218- scan_ .GetNullString (), null_str);
278+ scan .GetNullString (), null_str);
219279 }
220280 for (uint32_t i = 0 ; i < output_attributes_.size (); i++) {
221281 one.AddAttribute (output_attributes_[i], &column_accessors[i]);
@@ -238,17 +298,10 @@ void CSVScanTranslator::Produce() const {
238298 GetCodeGen ().Call (CSVScannerProxy::Produce, {scanner_ptr});
239299}
240300
241- void CSVScanTranslator::TearDownState () {
301+ void CSVScanTranslator::TearDownQueryState () {
242302 auto *scanner_ptr = LoadStatePtr (scanner_id_);
243303 GetCodeGen ().Call (CSVScannerProxy::Destroy, {scanner_ptr});
244304}
245305
246- std::string CSVScanTranslator::GetName () const {
247- return StringUtil::Format (
248- " CSVScan(file: '%s', delimiter: '%c', quote: '%c', escape: '%c')" ,
249- scan_.GetFileName ().c_str (), scan_.GetDelimiterChar (),
250- scan_.GetQuoteChar (), scan_.GetEscapeChar ());
251- }
252-
253306} // namespace codegen
254- } // namespace peloton
307+ } // namespace peloton
0 commit comments