Skip to content

Commit 8a48906

Browse files
author
Robert Kruszewski
committed
Revert "[SPARK-25842][SQL] Deprecate rangeBetween APIs introduced in SPARK-21608"
This reverts commit 89d748b.
1 parent 82c5f88 commit 8a48906

File tree

2 files changed

+82
-18
lines changed

2 files changed

+82
-18
lines changed

python/pyspark/sql/functions.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,36 @@ def ntile(n):
858858
return Column(sc._jvm.functions.ntile(int(n)))
859859

860860

861+
@since(2.4)
862+
def unboundedPreceding():
863+
"""
864+
Window function: returns the special frame boundary that represents the first row
865+
in the window partition.
866+
"""
867+
sc = SparkContext._active_spark_context
868+
return Column(sc._jvm.functions.unboundedPreceding())
869+
870+
871+
@since(2.4)
872+
def unboundedFollowing():
873+
"""
874+
Window function: returns the special frame boundary that represents the last row
875+
in the window partition.
876+
"""
877+
sc = SparkContext._active_spark_context
878+
return Column(sc._jvm.functions.unboundedFollowing())
879+
880+
881+
@since(2.4)
882+
def currentRow():
883+
"""
884+
Window function: returns the special frame boundary that represents the current row
885+
in the window partition.
886+
"""
887+
sc = SparkContext._active_spark_context
888+
return Column(sc._jvm.functions.currentRow())
889+
890+
861891
# ---------------------- Date/Timestamp functions ------------------------------
862892

863893
@since(1.5)

python/pyspark/sql/window.py

Lines changed: 52 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616
#
1717

1818
import sys
19+
if sys.version >= '3':
20+
long = int
1921

2022
from pyspark import since, SparkContext
21-
from pyspark.sql.column import _to_seq, _to_java_column
23+
from pyspark.sql.column import Column, _to_seq, _to_java_column
2224

2325
__all__ = ["Window", "WindowSpec"]
2426

@@ -124,20 +126,45 @@ def rangeBetween(start, end):
124126
and "5" means the five off after the current row.
125127
126128
We recommend users use ``Window.unboundedPreceding``, ``Window.unboundedFollowing``,
127-
and ``Window.currentRow`` to specify special boundary values, rather than using integral
128-
values directly.
129+
``Window.currentRow``, ``pyspark.sql.functions.unboundedPreceding``,
130+
``pyspark.sql.functions.unboundedFollowing`` and ``pyspark.sql.functions.currentRow``
131+
to specify special boundary values, rather than using integral values directly.
129132
130133
:param start: boundary start, inclusive.
131-
The frame is unbounded if this is ``Window.unboundedPreceding``, or
134+
The frame is unbounded if this is ``Window.unboundedPreceding``,
135+
a column returned by ``pyspark.sql.functions.unboundedPreceding``, or
132136
any value less than or equal to max(-sys.maxsize, -9223372036854775808).
133137
:param end: boundary end, inclusive.
134-
The frame is unbounded if this is ``Window.unboundedFollowing``, or
138+
The frame is unbounded if this is ``Window.unboundedFollowing``,
139+
a column returned by ``pyspark.sql.functions.unboundedFollowing``, or
135140
any value greater than or equal to min(sys.maxsize, 9223372036854775807).
141+
142+
>>> from pyspark.sql import functions as F, SparkSession, Window
143+
>>> spark = SparkSession.builder.getOrCreate()
144+
>>> df = spark.createDataFrame(
145+
... [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")], ["id", "category"])
146+
>>> window = Window.orderBy("id").partitionBy("category").rangeBetween(
147+
... F.currentRow(), F.lit(1))
148+
>>> df.withColumn("sum", F.sum("id").over(window)).show()
149+
+---+--------+---+
150+
| id|category|sum|
151+
+---+--------+---+
152+
| 1| b| 3|
153+
| 2| b| 5|
154+
| 3| b| 3|
155+
| 1| a| 4|
156+
| 1| a| 4|
157+
| 2| a| 2|
158+
+---+--------+---+
136159
"""
137-
if start <= Window._PRECEDING_THRESHOLD:
138-
start = Window.unboundedPreceding
139-
if end >= Window._FOLLOWING_THRESHOLD:
140-
end = Window.unboundedFollowing
160+
if isinstance(start, (int, long)) and isinstance(end, (int, long)):
161+
if start <= Window._PRECEDING_THRESHOLD:
162+
start = Window.unboundedPreceding
163+
if end >= Window._FOLLOWING_THRESHOLD:
164+
end = Window.unboundedFollowing
165+
elif isinstance(start, Column) and isinstance(end, Column):
166+
start = start._jc
167+
end = end._jc
141168
sc = SparkContext._active_spark_context
142169
jspec = sc._jvm.org.apache.spark.sql.expressions.Window.rangeBetween(start, end)
143170
return WindowSpec(jspec)
@@ -212,27 +239,34 @@ def rangeBetween(self, start, end):
212239
and "5" means the five off after the current row.
213240
214241
We recommend users use ``Window.unboundedPreceding``, ``Window.unboundedFollowing``,
215-
and ``Window.currentRow`` to specify special boundary values, rather than using integral
216-
values directly.
242+
``Window.currentRow``, ``pyspark.sql.functions.unboundedPreceding``,
243+
``pyspark.sql.functions.unboundedFollowing`` and ``pyspark.sql.functions.currentRow``
244+
to specify special boundary values, rather than using integral values directly.
217245
218246
:param start: boundary start, inclusive.
219-
The frame is unbounded if this is ``Window.unboundedPreceding``, or
247+
The frame is unbounded if this is ``Window.unboundedPreceding``,
248+
a column returned by ``pyspark.sql.functions.unboundedPreceding``, or
220249
any value less than or equal to max(-sys.maxsize, -9223372036854775808).
221250
:param end: boundary end, inclusive.
222-
The frame is unbounded if this is ``Window.unboundedFollowing``, or
251+
The frame is unbounded if this is ``Window.unboundedFollowing``,
252+
a column returned by ``pyspark.sql.functions.unboundedFollowing``, or
223253
any value greater than or equal to min(sys.maxsize, 9223372036854775807).
224254
"""
225-
if start <= Window._PRECEDING_THRESHOLD:
226-
start = Window.unboundedPreceding
227-
if end >= Window._FOLLOWING_THRESHOLD:
228-
end = Window.unboundedFollowing
255+
if isinstance(start, (int, long)) and isinstance(end, (int, long)):
256+
if start <= Window._PRECEDING_THRESHOLD:
257+
start = Window.unboundedPreceding
258+
if end >= Window._FOLLOWING_THRESHOLD:
259+
end = Window.unboundedFollowing
260+
elif isinstance(start, Column) and isinstance(end, Column):
261+
start = start._jc
262+
end = end._jc
229263
return WindowSpec(self._jspec.rangeBetween(start, end))
230264

231265

232266
def _test():
233267
import doctest
234268
SparkContext('local[4]', 'PythonTest')
235-
(failure_count, test_count) = doctest.testmod()
269+
(failure_count, test_count) = doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
236270
if failure_count:
237271
sys.exit(-1)
238272

0 commit comments

Comments
 (0)