@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
24
24
import org .apache .spark .sql .catalyst .expressions .aggregate .DeclarativeAggregate
25
25
import org .apache .spark .sql .catalyst .expressions .codegen ._
26
26
import org .apache .spark .sql .catalyst .expressions .codegen .Block ._
27
+ import org .apache .spark .sql .catalyst .plans .logical .LogicalPlan
27
28
import org .apache .spark .sql .catalyst .trees .TreeNode
28
29
import org .apache .spark .sql .catalyst .util .truncatedString
29
30
import org .apache .spark .sql .internal .SQLConf
@@ -40,12 +41,28 @@ import org.apache.spark.sql.types._
40
41
* "name(arguments...)", the concrete implementation must be a case class whose constructor
41
42
* arguments are all Expressions types. See [[Substring ]] for an example.
42
43
*
43
- * There are a few important traits:
44
+ * There are a few important traits or abstract classes :
44
45
*
45
46
* - [[Nondeterministic ]]: an expression that is not deterministic.
47
+ * - [[Stateful ]]: an expression that contains mutable state. For example, MonotonicallyIncreasingID
48
+ * and Rand. A stateful expression is always non-deterministic.
46
49
* - [[Unevaluable ]]: an expression that is not supposed to be evaluated.
47
50
* - [[CodegenFallback ]]: an expression that does not have code gen implemented and falls back to
48
51
* interpreted mode.
52
+ * - [[NullIntolerant ]]: an expression that is null intolerant (i.e. any null input will result in
53
+ * null output).
54
+ * - [[NonSQLExpression ]]: a common base trait for the expressions that do not have SQL
55
+ * expressions like representation. For example, `ScalaUDF`, `ScalaUDAF`,
56
+ * and object `MapObjects` and `Invoke`.
57
+ * - [[UserDefinedExpression ]]: a common base trait for user-defined functions, including
58
+ * UDF/UDAF/UDTF.
59
+ * - [[HigherOrderFunction ]]: a common base trait for higher order functions that take one or more
60
+ * (lambda) functions and applies these to some objects. The function
61
+ * produces a number of variables which can be consumed by some lambda
62
+ * functions.
63
+ * - [[NamedExpression ]]: An [[Expression ]] that is named.
64
+ * - [[TimeZoneAwareExpression ]]: A common base trait for time zone aware expressions.
65
+ * - [[SubqueryExpression ]]: A base interface for expressions that contain a [[LogicalPlan ]].
49
66
*
50
67
* - [[LeafExpression ]]: an expression that has no child.
51
68
* - [[UnaryExpression ]]: an expression that has one child.
@@ -54,12 +71,20 @@ import org.apache.spark.sql.types._
54
71
* - [[BinaryOperator ]]: a special case of [[BinaryExpression ]] that requires two children to have
55
72
* the same output data type.
56
73
*
74
+ * A few important traits used for type coercion rules:
75
+ * - [[ExpectsInputTypes ]]: an expression that has the expected input types. This trait is typically
76
+ * used by operator expressions (e.g. [[Add ]], [[Subtract ]]) to define
77
+ * expected input types without any implicit casting.
78
+ * - [[ImplicitCastInputTypes ]]: an expression that has the expected input types, which can be
79
+ * implicitly castable using [[TypeCoercion.ImplicitTypeCasts ]].
80
+ * - [[ComplexTypeMergingExpression ]]: to resolve output types of the complex expressions
81
+ * (e.g., [[CaseWhen ]]).
57
82
*/
58
83
abstract class Expression extends TreeNode [Expression ] {
59
84
60
85
/**
61
86
* Returns true when an expression is a candidate for static evaluation before the query is
62
- * executed.
87
+ * executed. A typical use case: [[ org.apache.spark.sql.catalyst.optimizer.ConstantFolding ]]
63
88
*
64
89
* The following conditions are used to determine suitability for constant folding:
65
90
* - A [[Coalesce ]] is foldable if all of its children are foldable
@@ -72,7 +97,8 @@ abstract class Expression extends TreeNode[Expression] {
72
97
73
98
/**
74
99
* Returns true when the current expression always return the same result for fixed inputs from
75
- * children.
100
+ * children. The non-deterministic expressions should not change in number and order. They should
101
+ * not be evaluated during the query planning.
76
102
*
77
103
* Note that this means that an expression should be considered as non-deterministic if:
78
104
* - it relies on some mutable internal state, or
@@ -252,8 +278,9 @@ abstract class Expression extends TreeNode[Expression] {
252
278
253
279
254
280
/**
255
- * An expression that cannot be evaluated. Some expressions don't live past analysis or optimization
256
- * time (e.g. Star). This trait is used by those expressions.
281
+ * An expression that cannot be evaluated. These expressions don't live past analysis or
282
+ * optimization time (e.g. Star) and should not be evaluated during query planning and
283
+ * execution.
257
284
*/
258
285
trait Unevaluable extends Expression {
259
286
@@ -724,9 +751,10 @@ abstract class TernaryExpression extends Expression {
724
751
}
725
752
726
753
/**
727
- * A trait resolving nullable, containsNull, valueContainsNull flags of the output date type.
728
- * This logic is usually utilized by expressions combining data from multiple child expressions
729
- * of non-primitive types (e.g. [[CaseWhen ]]).
754
+ * A trait used for resolving nullable flags, including `nullable`, `containsNull` of [[ArrayType ]]
755
+ * and `valueContainsNull` of [[MapType ]], containsNull, valueContainsNull flags of the output date
756
+ * type. This is usually utilized by the expressions (e.g. [[CaseWhen ]]) that combine data from
757
+ * multiple child expressions of non-primitive types.
730
758
*/
731
759
trait ComplexTypeMergingExpression extends Expression {
732
760
0 commit comments