|
| 1 | +// Copyright 2019 The SQLFlow Authors. All rights reserved. |
| 2 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 3 | +// you may not use this file except in compliance with the License. |
| 4 | +// You may obtain a copy of the License at |
| 5 | +// |
| 6 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 7 | +// |
| 8 | +// Unless required by applicable law or agreed to in writing, software |
| 9 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 10 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 11 | +// See the License for the specific language governing permissions and |
| 12 | +// limitations under the License. |
| 13 | + |
| 14 | +package codegen |
| 15 | + |
| 16 | +// FieldType indicates the field type of a table column |
| 17 | +type FieldType int |
| 18 | + |
| 19 | +const ( |
| 20 | + // Int indicates the corresponding table column is an integer |
| 21 | + Int FieldType = iota |
| 22 | + // Float indicates the corresponding table column is a float |
| 23 | + Float |
| 24 | + // String indicates the corresponding table column is a string |
| 25 | + String |
| 26 | +) |
| 27 | + |
| 28 | +// FieldMeta contains the meta information for decoding. A field is a selected column of a SQL result. |
| 29 | +// |
| 30 | +// Name indicates the name for a field. |
| 31 | +// |
| 32 | +// DType indicates the data type for a field. For example: Int, Float, String. |
| 33 | +// |
| 34 | +// Delimiter indicates the decoding method of a field. For example, the field may |
| 35 | +// contain a string like "1,23,42" which represent a 3-D tensor [1, 23, 42]. |
| 36 | +// |
| 37 | +// Shape indicates the shape of the tensor represented for a field. For exmaple, the |
| 38 | +// field may contain a string like "1,23,42" which represent a 3-D tensor, the shape |
| 39 | +// will be [3]. |
| 40 | +// |
| 41 | +// IsSparse indicates the type of tensor for a field. True means the tensor is a sparse tensor. |
| 42 | +type FieldMeta struct { |
| 43 | + Name string `json:"name"` // e.g. "spetal_length" |
| 44 | + DType FieldType `json:"dtype"` // e.g. "float", "int32" |
| 45 | + Delimiter string `json:"delimiter"` // e.g. "," |
| 46 | + Shape []int `json:"shape"` // e.g. [1], [1 2 3] |
| 47 | + IsSparse bool `json:"is_sparse"` // e.g. false |
| 48 | +} |
| 49 | + |
| 50 | +// FeatureColumn indicates the feature column to be applied on the field. Please refer to |
| 51 | +// github.com/sql-machine-learning/sqlflow/sql/codegen/feature_column.go for detailed list of all feature columns. |
| 52 | +type FeatureColumn interface{} |
| 53 | + |
| 54 | +// Attribute represents an parsed entry in the WITH clause. |
| 55 | +type Attribute struct { |
| 56 | + Key string |
| 57 | + Value interface{} |
| 58 | +} |
| 59 | + |
| 60 | +// TrainIR is the intermediate representation for code generation of a training job. |
| 61 | +// |
| 62 | +// Please be aware that the TrainIR intentionally excludes the model table name in the |
| 63 | +// INTO clause. The sql package will save the output files of a generated Python program. |
| 64 | +// For prediction and analysis jobs, the sql will restore an identical working directly. |
| 65 | +type TrainIR struct { |
| 66 | + // DataSource contains the connection information. For example, "hive://root:root@localhost:10000/churn" |
| 67 | + DataSource string |
| 68 | + // Select specifies the query for fetching the training data. For example, "select * from iris.train;". |
| 69 | + Select string |
| 70 | + // ValidationSelect specifies the query for fetching the validation data. For example, "select * from iris.val;". |
| 71 | + ValidationSelect string |
| 72 | + // Estimator specifies the estimator type. For example, after parsing "select ... train DNNClassifier WITH ...", |
| 73 | + // the Estimator will be "DNNClassifier". |
| 74 | + Estimator string |
| 75 | + // Attributes contain a list of parsed attribute in the WITH Clause. For example, after parsing |
| 76 | + // "select ... train ... with train.epoch = 1000, model.hidden_units = [10, 10]", |
| 77 | + // the Attributes will be {{"train.epoch", 1000}, {"model.hidden_units", [10 10]}}. |
| 78 | + Attributes []Attribute |
| 79 | + // Features contain a map of a list of feature columns in the COLUMN clause. |
| 80 | + // For multiple COLUMN clauses like |
| 81 | + // ``` |
| 82 | + // column ... for deep_feature |
| 83 | + // column ... for wide_feature |
| 84 | + // ``` |
| 85 | + // They will be parsed as {"deep_feature": {...}, "wide_feature": {...}} |
| 86 | + // For single column clause like "column ...", "feature_columns" will be used as the default map key. |
| 87 | + Features map[string][]FeatureColumn |
| 88 | + // Label specifies the feature column in the LABEL clause. |
| 89 | + Label FeatureColumn |
| 90 | +} |
| 91 | + |
| 92 | +// PredictIR is the intermediate representation for code generation of a prediction job |
| 93 | +// |
| 94 | +// Please be aware the PredictionIR contains the result table name, so the |
| 95 | +// generated Python program is responsible to create and write the result table. |
| 96 | +type PredictIR struct { |
| 97 | + // DataSource contains the connection information. For example, "hive://root:root@localhost:10000/churn" |
| 98 | + DataSource string |
| 99 | + // Select specifies the query for fetching the prediction data. For example, "select * from iris.test;". |
| 100 | + Select string |
| 101 | + // ResultTable specifies the table to store the prediction result. |
| 102 | + ResultTable string |
| 103 | + // Attributes contain a list of parsed attribute in the WITH clause. For example, after parsing |
| 104 | + // "select ... predict ... with predict.batch_size = 32 into ...", |
| 105 | + // the Attributes will be {{"predict.batch_size", 32}} |
| 106 | + Attributes []Attribute |
| 107 | + // TrainIR is the TrainIR used for generating the training job of the corresponding model |
| 108 | + TrainIR TrainIR |
| 109 | +} |
| 110 | + |
| 111 | +// AnalyzeIR is the intermediate representation for code generation of a analysis job |
| 112 | +type AnalyzeIR struct { |
| 113 | + // DataSource contains the connection information. For example, "hive://root:root@localhost:10000/churn" |
| 114 | + DataSource string |
| 115 | + // Select specifies the query for fetching the analysis data. For example, "select * from iris.test;". |
| 116 | + Select string |
| 117 | + // Attributes contain a list of parsed attribute in the WITH clause. For example, after parsing |
| 118 | + // "select ... analyze ... with analyze.plot_type = "bar"", |
| 119 | + // the Attributes will be {{"analyze.plot_type", "bar"}} |
| 120 | + Attributes []Attribute |
| 121 | + // TrainIR is the TrainIR used for generating the training job of the corresponding model |
| 122 | + TrainIR TrainIR |
| 123 | +} |
0 commit comments