@@ -687,7 +687,7 @@ setMethod("storageLevel",
687
687
# ' @rdname coalesce
688
688
# ' @name coalesce
689
689
# ' @aliases coalesce,SparkDataFrame-method
690
- # ' @seealso \link{repartition}
690
+ # ' @seealso \link{repartition}, \link{repartitionByRange}
691
691
# ' @examples
692
692
# '\dontrun{
693
693
# ' sparkR.session()
@@ -723,7 +723,7 @@ setMethod("coalesce",
723
723
# ' @rdname repartition
724
724
# ' @name repartition
725
725
# ' @aliases repartition,SparkDataFrame-method
726
- # ' @seealso \link{coalesce}
726
+ # ' @seealso \link{coalesce}, \link{repartitionByRange}
727
727
# ' @examples
728
728
# '\dontrun{
729
729
# ' sparkR.session()
@@ -759,6 +759,67 @@ setMethod("repartition",
759
759
dataFrame(sdf )
760
760
})
761
761
762
+
763
+ # ' Repartition by range
764
+ # '
765
+ # ' The following options for repartition by range are possible:
766
+ # ' \itemize{
767
+ # ' \item{1.} {Return a new SparkDataFrame range partitioned by
768
+ # ' the given columns into \code{numPartitions}.}
769
+ # ' \item{2.} {Return a new SparkDataFrame range partitioned by the given column(s),
770
+ # ' using \code{spark.sql.shuffle.partitions} as number of partitions.}
771
+ # '}
772
+ # '
773
+ # ' @param x a SparkDataFrame.
774
+ # ' @param numPartitions the number of partitions to use.
775
+ # ' @param col the column by which the range partitioning will be performed.
776
+ # ' @param ... additional column(s) to be used in the range partitioning.
777
+ # '
778
+ # ' @family SparkDataFrame functions
779
+ # ' @rdname repartitionByRange
780
+ # ' @name repartitionByRange
781
+ # ' @aliases repartitionByRange,SparkDataFrame-method
782
+ # ' @seealso \link{repartition}, \link{coalesce}
783
+ # ' @examples
784
+ # '\dontrun{
785
+ # ' sparkR.session()
786
+ # ' path <- "path/to/file.json"
787
+ # ' df <- read.json(path)
788
+ # ' newDF <- repartitionByRange(df, col = df$col1, df$col2)
789
+ # ' newDF <- repartitionByRange(df, 3L, col = df$col1, df$col2)
790
+ # '}
791
+ # ' @note repartitionByRange since 2.4.0
792
+ setMethod ("repartitionByRange ",
793
+ signature(x = " SparkDataFrame" ),
794
+ function (x , numPartitions = NULL , col = NULL , ... ) {
795
+ if (! is.null(numPartitions ) && ! is.null(col )) {
796
+ # number of partitions and columns both are specified
797
+ if (is.numeric(numPartitions ) && class(col ) == " Column" ) {
798
+ cols <- list (col , ... )
799
+ jcol <- lapply(cols , function (c ) { c @ jc })
800
+ sdf <- callJMethod(x @ sdf , " repartitionByRange" , numToInt(numPartitions ), jcol )
801
+ } else {
802
+ stop(paste(" numPartitions and col must be numeric and Column; however, got" ,
803
+ class(numPartitions ), " and" , class(col )))
804
+ }
805
+ } else if (! is.null(col )) {
806
+ # only columns are specified
807
+ if (class(col ) == " Column" ) {
808
+ cols <- list (col , ... )
809
+ jcol <- lapply(cols , function (c ) { c @ jc })
810
+ sdf <- callJMethod(x @ sdf , " repartitionByRange" , jcol )
811
+ } else {
812
+ stop(paste(" col must be Column; however, got" , class(col )))
813
+ }
814
+ } else if (! is.null(numPartitions )) {
815
+ # only numPartitions is specified
816
+ stop(" At least one partition-by column must be specified." )
817
+ } else {
818
+ stop(" Please, specify a column(s) or the number of partitions with a column(s)" )
819
+ }
820
+ dataFrame(sdf )
821
+ })
822
+
762
823
# ' toJSON
763
824
# '
764
825
# ' Converts a SparkDataFrame into a SparkDataFrame of JSON string.
0 commit comments