[SPARK-28439][PYTHON][SQL] Add support for count: Column in array_repeat

zero323 · dongjoon-hyun · commit a0c2fa63abc4 · 2019-07-18T12:58:48.000-07:00
## What changes were proposed in this pull request? This adds simple check for `count` argument: - If it is a `Column` we apply `_to_java_column` before invoking JVM counterpart - Otherwise we proceed as before. ## How was this patch tested? Manual testing. Closes apache#25193 from zero323/SPARK-28278. Authored-by: zero323 <mszymkiewicz@gmail.com> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
@@ -2698,7 +2698,10 @@ def array_repeat(col, count):
     [Row(r=[u'ab', u'ab', u'ab'])]
     """
     sc = SparkContext._active_spark_context
-    return Column(sc._jvm.functions.array_repeat(_to_java_column(col), count))
+    return Column(sc._jvm.functions.array_repeat(
+        _to_java_column(col),
+        _to_java_column(count) if isinstance(count, Column) else count
+    ))
 
 
 @since(2.4)
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
@@ -294,6 +294,16 @@ def test_input_file_name_reset_for_rdd(self):
         for result in results:
             self.assertEqual(result[0], '')
 
+    def test_array_repeat(self):
+        from pyspark.sql.functions import array_repeat, lit
+
+        df = self.spark.range(1)
+
+        self.assertEquals(
+            df.select(array_repeat("id", 3)).toDF("val").collect(),
+            df.select(array_repeat("id", lit(3))).toDF("val").collect(),
+        )
+
 
 if __name__ == "__main__":
     import unittest