Revert "[SPARK-25842][SQL] Deprecate rangeBetween APIs introduced in SPARK-21608"

Robert Kruszewski · Robert Kruszewski · commit 8a48906a7195 · 2019-01-06T21:42:21.000Z
This reverts commit 89d748b.
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
@@ -858,6 +858,36 @@ def ntile(n):
     return Column(sc._jvm.functions.ntile(int(n)))
 
 
+@since(2.4)
+def unboundedPreceding():
+    """
+    Window function: returns the special frame boundary that represents the first row
+    in the window partition.
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.unboundedPreceding())
+
+
+@since(2.4)
+def unboundedFollowing():
+    """
+    Window function: returns the special frame boundary that represents the last row
+    in the window partition.
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.unboundedFollowing())
+
+
+@since(2.4)
+def currentRow():
+    """
+    Window function: returns the special frame boundary that represents the current row
+    in the window partition.
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.currentRow())
+
+
 # ---------------------- Date/Timestamp functions ------------------------------
 
 @since(1.5)
diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py
@@ -16,9 +16,11 @@
 #
 
 import sys
+if sys.version >= '3':
+    long = int
 
 from pyspark import since, SparkContext
-from pyspark.sql.column import _to_seq, _to_java_column
+from pyspark.sql.column import Column, _to_seq, _to_java_column
 
 __all__ = ["Window", "WindowSpec"]
 
@@ -124,20 +126,45 @@ def rangeBetween(start, end):
         and "5" means the five off after the current row.
 
         We recommend users use ``Window.unboundedPreceding``, ``Window.unboundedFollowing``,
-        and ``Window.currentRow`` to specify special boundary values, rather than using integral
-        values directly.
+        ``Window.currentRow``, ``pyspark.sql.functions.unboundedPreceding``,
+        ``pyspark.sql.functions.unboundedFollowing`` and ``pyspark.sql.functions.currentRow``
+        to specify special boundary values, rather than using integral values directly.
 
         :param start: boundary start, inclusive.
-                      The frame is unbounded if this is ``Window.unboundedPreceding``, or
+                      The frame is unbounded if this is ``Window.unboundedPreceding``,
+                      a column returned by ``pyspark.sql.functions.unboundedPreceding``, or
                       any value less than or equal to max(-sys.maxsize, -9223372036854775808).
         :param end: boundary end, inclusive.
-                    The frame is unbounded if this is ``Window.unboundedFollowing``, or
+                    The frame is unbounded if this is ``Window.unboundedFollowing``,
+                    a column returned by ``pyspark.sql.functions.unboundedFollowing``, or
                     any value greater than or equal to min(sys.maxsize, 9223372036854775807).
+
+        >>> from pyspark.sql import functions as F, SparkSession, Window
+        >>> spark = SparkSession.builder.getOrCreate()
+        >>> df = spark.createDataFrame(
+        ...     [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")], ["id", "category"])
+        >>> window = Window.orderBy("id").partitionBy("category").rangeBetween(
+        ...     F.currentRow(), F.lit(1))
+        >>> df.withColumn("sum", F.sum("id").over(window)).show()
+        +---+--------+---+
+        | id|category|sum|
+        +---+--------+---+
+        |  1|       b|  3|
+        |  2|       b|  5|
+        |  3|       b|  3|
+        |  1|       a|  4|
+        |  1|       a|  4|
+        |  2|       a|  2|
+        +---+--------+---+
         """
-        if start <= Window._PRECEDING_THRESHOLD:
-            start = Window.unboundedPreceding
-        if end >= Window._FOLLOWING_THRESHOLD:
-            end = Window.unboundedFollowing
+        if isinstance(start, (int, long)) and isinstance(end, (int, long)):
+            if start <= Window._PRECEDING_THRESHOLD:
+                start = Window.unboundedPreceding
+            if end >= Window._FOLLOWING_THRESHOLD:
+                end = Window.unboundedFollowing
+        elif isinstance(start, Column) and isinstance(end, Column):
+            start = start._jc
+            end = end._jc
         sc = SparkContext._active_spark_context
         jspec = sc._jvm.org.apache.spark.sql.expressions.Window.rangeBetween(start, end)
         return WindowSpec(jspec)
@@ -212,27 +239,34 @@ def rangeBetween(self, start, end):
         and "5" means the five off after the current row.
 
         We recommend users use ``Window.unboundedPreceding``, ``Window.unboundedFollowing``,
-        and ``Window.currentRow`` to specify special boundary values, rather than using integral
-        values directly.
+        ``Window.currentRow``, ``pyspark.sql.functions.unboundedPreceding``,
+        ``pyspark.sql.functions.unboundedFollowing`` and ``pyspark.sql.functions.currentRow``
+        to specify special boundary values, rather than using integral values directly.
 
         :param start: boundary start, inclusive.
-                      The frame is unbounded if this is ``Window.unboundedPreceding``, or
+                      The frame is unbounded if this is ``Window.unboundedPreceding``,
+                      a column returned by ``pyspark.sql.functions.unboundedPreceding``, or
                       any value less than or equal to max(-sys.maxsize, -9223372036854775808).
         :param end: boundary end, inclusive.
-                    The frame is unbounded if this is ``Window.unboundedFollowing``, or
+                    The frame is unbounded if this is ``Window.unboundedFollowing``,
+                    a column returned by ``pyspark.sql.functions.unboundedFollowing``, or
                     any value greater than or equal to min(sys.maxsize, 9223372036854775807).
         """
-        if start <= Window._PRECEDING_THRESHOLD:
-            start = Window.unboundedPreceding
-        if end >= Window._FOLLOWING_THRESHOLD:
-            end = Window.unboundedFollowing
+        if isinstance(start, (int, long)) and isinstance(end, (int, long)):
+            if start <= Window._PRECEDING_THRESHOLD:
+                start = Window.unboundedPreceding
+            if end >= Window._FOLLOWING_THRESHOLD:
+                end = Window.unboundedFollowing
+        elif isinstance(start, Column) and isinstance(end, Column):
+            start = start._jc
+            end = end._jc
         return WindowSpec(self._jspec.rangeBetween(start, end))
 
 
 def _test():
     import doctest
     SparkContext('local[4]', 'PythonTest')
-    (failure_count, test_count) = doctest.testmod()
+    (failure_count, test_count) = doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
     if failure_count:
         sys.exit(-1)