Skip to content

Commit 0025a83

Browse files
srowenHyukjinKwon
andcommitted
[SPARK-25908][CORE][SQL] Remove old deprecated items in Spark 3
## What changes were proposed in this pull request? - Remove some AccumulableInfo .apply() methods - Remove non-label-specific multiclass precision/recall/fScore in favor of accuracy - Remove toDegrees/toRadians in favor of degrees/radians (SparkR: only deprecated) - Remove approxCountDistinct in favor of approx_count_distinct (SparkR: only deprecated) - Remove unused Python StorageLevel constants - Remove Dataset unionAll in favor of union - Remove unused multiclass option in libsvm parsing - Remove references to deprecated spark configs like spark.yarn.am.port - Remove TaskContext.isRunningLocally - Remove ShuffleMetrics.shuffle* methods - Remove BaseReadWrite.context in favor of session - Remove Column.!== in favor of =!= - Remove Dataset.explode - Remove Dataset.registerTempTable - Remove SQLContext.getOrCreate, setActive, clearActive, constructors Not touched yet - everything else in MLLib - HiveContext - Anything deprecated more recently than 2.0.0, generally ## How was this patch tested? Existing tests Closes apache#22921 from srowen/SPARK-25908. Lead-authored-by: Sean Owen <[email protected]> Co-authored-by: hyukjinkwon <[email protected]> Co-authored-by: Sean Owen <[email protected]> Signed-off-by: Sean Owen <[email protected]>
1 parent a8e1c98 commit 0025a83

File tree

27 files changed

+132
-528
lines changed

27 files changed

+132
-528
lines changed

R/pkg/NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ exportMethods("%<=>%",
194194
"acos",
195195
"add_months",
196196
"alias",
197+
"approx_count_distinct",
197198
"approxCountDistinct",
198199
"approxQuantile",
199200
"array_contains",
@@ -252,6 +253,7 @@ exportMethods("%<=>%",
252253
"dayofweek",
253254
"dayofyear",
254255
"decode",
256+
"degrees",
255257
"dense_rank",
256258
"desc",
257259
"element_at",
@@ -334,6 +336,7 @@ exportMethods("%<=>%",
334336
"posexplode",
335337
"posexplode_outer",
336338
"quarter",
339+
"radians",
337340
"rand",
338341
"randn",
339342
"rank",

R/pkg/R/functions.R

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ NULL
112112
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
113113
#' tmp <- mutate(df, v1 = log(df$mpg), v2 = cbrt(df$disp),
114114
#' v3 = bround(df$wt, 1), v4 = bin(df$cyl),
115-
#' v5 = hex(df$wt), v6 = toDegrees(df$gear),
115+
#' v5 = hex(df$wt), v6 = degrees(df$gear),
116116
#' v7 = atan2(df$cyl, df$am), v8 = hypot(df$cyl, df$am),
117117
#' v9 = pmod(df$hp, df$cyl), v10 = shiftLeft(df$disp, 1),
118118
#' v11 = conv(df$hp, 10, 16), v12 = sign(df$vs - 0.5),
@@ -320,23 +320,37 @@ setMethod("acos",
320320
})
321321

322322
#' @details
323-
#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
323+
#' \code{approx_count_distinct}: Returns the approximate number of distinct items in a group.
324324
#'
325325
#' @rdname column_aggregate_functions
326-
#' @aliases approxCountDistinct approxCountDistinct,Column-method
326+
#' @aliases approx_count_distinct approx_count_distinct,Column-method
327327
#' @examples
328328
#'
329329
#' \dontrun{
330-
#' head(select(df, approxCountDistinct(df$gear)))
331-
#' head(select(df, approxCountDistinct(df$gear, 0.02)))
330+
#' head(select(df, approx_count_distinct(df$gear)))
331+
#' head(select(df, approx_count_distinct(df$gear, 0.02)))
332332
#' head(select(df, countDistinct(df$gear, df$cyl)))
333333
#' head(select(df, n_distinct(df$gear)))
334334
#' head(distinct(select(df, "gear")))}
335+
#' @note approx_count_distinct(Column) since 3.0.0
336+
setMethod("approx_count_distinct",
337+
signature(x = "Column"),
338+
function(x) {
339+
jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc)
340+
column(jc)
341+
})
342+
343+
#' @details
344+
#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
345+
#'
346+
#' @rdname column_aggregate_functions
347+
#' @aliases approxCountDistinct approxCountDistinct,Column-method
335348
#' @note approxCountDistinct(Column) since 1.4.0
336349
setMethod("approxCountDistinct",
337350
signature(x = "Column"),
338351
function(x) {
339-
jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc)
352+
.Deprecated("approx_count_distinct")
353+
jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc)
340354
column(jc)
341355
})
342356

@@ -1651,7 +1665,22 @@ setMethod("tanh",
16511665
setMethod("toDegrees",
16521666
signature(x = "Column"),
16531667
function(x) {
1654-
jc <- callJStatic("org.apache.spark.sql.functions", "toDegrees", x@jc)
1668+
.Deprecated("degrees")
1669+
jc <- callJStatic("org.apache.spark.sql.functions", "degrees", x@jc)
1670+
column(jc)
1671+
})
1672+
1673+
#' @details
1674+
#' \code{degrees}: Converts an angle measured in radians to an approximately equivalent angle
1675+
#' measured in degrees.
1676+
#'
1677+
#' @rdname column_math_functions
1678+
#' @aliases degrees degrees,Column-method
1679+
#' @note degrees since 3.0.0
1680+
setMethod("degrees",
1681+
signature(x = "Column"),
1682+
function(x) {
1683+
jc <- callJStatic("org.apache.spark.sql.functions", "degrees", x@jc)
16551684
column(jc)
16561685
})
16571686

@@ -1665,7 +1694,22 @@ setMethod("toDegrees",
16651694
setMethod("toRadians",
16661695
signature(x = "Column"),
16671696
function(x) {
1668-
jc <- callJStatic("org.apache.spark.sql.functions", "toRadians", x@jc)
1697+
.Deprecated("radians")
1698+
jc <- callJStatic("org.apache.spark.sql.functions", "radians", x@jc)
1699+
column(jc)
1700+
})
1701+
1702+
#' @details
1703+
#' \code{radians}: Converts an angle measured in degrees to an approximately equivalent angle
1704+
#' measured in radians.
1705+
#'
1706+
#' @rdname column_math_functions
1707+
#' @aliases radians radians,Column-method
1708+
#' @note radians since 3.0.0
1709+
setMethod("radians",
1710+
signature(x = "Column"),
1711+
function(x) {
1712+
jc <- callJStatic("org.apache.spark.sql.functions", "radians", x@jc)
16691713
column(jc)
16701714
})
16711715

@@ -2065,13 +2109,24 @@ setMethod("pmod", signature(y = "Column"),
20652109

20662110
#' @param rsd maximum estimation error allowed (default = 0.05).
20672111
#'
2112+
#' @rdname column_aggregate_functions
2113+
#' @aliases approx_count_distinct,Column-method
2114+
#' @note approx_count_distinct(Column, numeric) since 3.0.0
2115+
setMethod("approx_count_distinct",
2116+
signature(x = "Column"),
2117+
function(x, rsd = 0.05) {
2118+
jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc, rsd)
2119+
column(jc)
2120+
})
2121+
20682122
#' @rdname column_aggregate_functions
20692123
#' @aliases approxCountDistinct,Column-method
20702124
#' @note approxCountDistinct(Column, numeric) since 1.4.0
20712125
setMethod("approxCountDistinct",
20722126
signature(x = "Column"),
20732127
function(x, rsd = 0.05) {
2074-
jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc, rsd)
2128+
.Deprecated("approx_count_distinct")
2129+
jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc, rsd)
20752130
column(jc)
20762131
})
20772132

R/pkg/R/generics.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,10 @@ setGeneric("windowOrderBy", function(col, ...) { standardGeneric("windowOrderBy"
746746
#' @name NULL
747747
setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
748748

749+
#' @rdname column_aggregate_functions
750+
#' @name NULL
751+
setGeneric("approx_count_distinct", function(x, ...) { standardGeneric("approx_count_distinct") })
752+
749753
#' @rdname column_aggregate_functions
750754
#' @name NULL
751755
setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
@@ -1287,10 +1291,18 @@ setGeneric("substring_index", function(x, delim, count) { standardGeneric("subst
12871291
#' @name NULL
12881292
setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
12891293

1294+
#' @rdname column_math_functions
1295+
#' @name NULL
1296+
setGeneric("degrees", function(x) { standardGeneric("degrees") })
1297+
12901298
#' @rdname column_math_functions
12911299
#' @name NULL
12921300
setGeneric("toDegrees", function(x) { standardGeneric("toDegrees") })
12931301

1302+
#' @rdname column_math_functions
1303+
#' @name NULL
1304+
setGeneric("radians", function(x) { standardGeneric("radians") })
1305+
12941306
#' @rdname column_math_functions
12951307
#' @name NULL
12961308
setGeneric("toRadians", function(x) { standardGeneric("toRadians") })

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,7 +1379,7 @@ test_that("column operators", {
13791379

13801380
test_that("column functions", {
13811381
c <- column("a")
1382-
c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c)
1382+
c1 <- abs(c) + acos(c) + approx_count_distinct(c) + ascii(c) + asin(c) + atan(c)
13831383
c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c)
13841384
c3 <- cosh(c) + count(c) + crc32(c) + hash(c) + exp(c)
13851385
c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c)
@@ -1388,7 +1388,7 @@ test_that("column functions", {
13881388
c7 <- mean(c) + min(c) + month(c) + negate(c) + posexplode(c) + quarter(c)
13891389
c8 <- reverse(c) + rint(c) + round(c) + rtrim(c) + sha1(c) + monotonically_increasing_id()
13901390
c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + sqrt(c) + sum(c)
1391-
c10 <- sumDistinct(c) + tan(c) + tanh(c) + toDegrees(c) + toRadians(c)
1391+
c10 <- sumDistinct(c) + tan(c) + tanh(c) + degrees(c) + radians(c)
13921392
c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
13931393
c12 <- variance(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c")
13941394
c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)

core/src/main/scala/org/apache/spark/BarrierTaskContext.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,6 @@ class BarrierTaskContext private[spark] (
158158

159159
override def isInterrupted(): Boolean = taskContext.isInterrupted()
160160

161-
override def isRunningLocally(): Boolean = taskContext.isRunningLocally()
162-
163161
override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = {
164162
taskContext.addTaskCompletionListener(listener)
165163
this

core/src/main/scala/org/apache/spark/TaskContext.scala

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,6 @@ abstract class TaskContext extends Serializable {
9696
*/
9797
def isInterrupted(): Boolean
9898

99-
/**
100-
* Returns true if the task is running locally in the driver program.
101-
* @return false
102-
*/
103-
@deprecated("Local execution was removed, so this always returns false", "2.0.0")
104-
def isRunningLocally(): Boolean
105-
10699
/**
107100
* Adds a (Java friendly) listener to be executed on task completion.
108101
* This will be called in all situations - success, failure, or cancellation. Adding a listener

core/src/main/scala/org/apache/spark/TaskContextImpl.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,6 @@ private[spark] class TaskContextImpl(
157157
@GuardedBy("this")
158158
override def isCompleted(): Boolean = synchronized(completed)
159159

160-
override def isRunningLocally(): Boolean = false
161-
162160
override def isInterrupted(): Boolean = reasonIfKilled.isDefined
163161

164162
override def getLocalProperty(key: String): String = localProperties.getProperty(key)

core/src/main/scala/org/apache/spark/executor/ShuffleWriteMetrics.scala

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,4 @@ class ShuffleWriteMetrics private[spark] () extends Serializable {
5656
private[spark] def decRecordsWritten(v: Long): Unit = {
5757
_recordsWritten.setValue(recordsWritten - v)
5858
}
59-
60-
// Legacy methods for backward compatibility.
61-
// TODO: remove these once we make this class private.
62-
@deprecated("use bytesWritten instead", "2.0.0")
63-
def shuffleBytesWritten: Long = bytesWritten
64-
@deprecated("use writeTime instead", "2.0.0")
65-
def shuffleWriteTime: Long = writeTime
66-
@deprecated("use recordsWritten instead", "2.0.0")
67-
def shuffleRecordsWritten: Long = recordsWritten
68-
6959
}

core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -47,33 +47,3 @@ case class AccumulableInfo private[spark] (
4747
private[spark] val countFailedValues: Boolean,
4848
// TODO: use this to identify internal task metrics instead of encoding it in the name
4949
private[spark] val metadata: Option[String] = None)
50-
51-
52-
/**
53-
* A collection of deprecated constructors. This will be removed soon.
54-
*/
55-
object AccumulableInfo {
56-
57-
@deprecated("do not create AccumulableInfo", "2.0.0")
58-
def apply(
59-
id: Long,
60-
name: String,
61-
update: Option[String],
62-
value: String,
63-
internal: Boolean): AccumulableInfo = {
64-
new AccumulableInfo(
65-
id, Option(name), update, Option(value), internal, countFailedValues = false)
66-
}
67-
68-
@deprecated("do not create AccumulableInfo", "2.0.0")
69-
def apply(id: Long, name: String, update: Option[String], value: String): AccumulableInfo = {
70-
new AccumulableInfo(
71-
id, Option(name), update, Option(value), internal = false, countFailedValues = false)
72-
}
73-
74-
@deprecated("do not create AccumulableInfo", "2.0.0")
75-
def apply(id: Long, name: String, value: String): AccumulableInfo = {
76-
new AccumulableInfo(
77-
id, Option(name), None, Option(value), internal = false, countFailedValues = false)
78-
}
79-
}

mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,6 @@ import org.apache.spark.util.{Utils, VersionUtils}
4747
private[util] sealed trait BaseReadWrite {
4848
private var optionSparkSession: Option[SparkSession] = None
4949

50-
/**
51-
* Sets the Spark SQLContext to use for saving/loading.
52-
*
53-
* @deprecated Use session instead. This method will be removed in 3.0.0.
54-
*/
55-
@Since("1.6.0")
56-
@deprecated("Use session instead. This method will be removed in 3.0.0.", "2.0.0")
57-
def context(sqlContext: SQLContext): this.type = {
58-
optionSparkSession = Option(sqlContext.sparkSession)
59-
this
60-
}
61-
6250
/**
6351
* Sets the Spark Session to use for saving/loading.
6452
*/
@@ -215,10 +203,6 @@ abstract class MLWriter extends BaseReadWrite with Logging {
215203
// override for Java compatibility
216204
@Since("1.6.0")
217205
override def session(sparkSession: SparkSession): this.type = super.session(sparkSession)
218-
219-
// override for Java compatibility
220-
@Since("1.6.0")
221-
override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession)
222206
}
223207

224208
/**
@@ -281,9 +265,6 @@ class GeneralMLWriter(stage: PipelineStage) extends MLWriter with Logging {
281265

282266
// override for Java compatibility
283267
override def session(sparkSession: SparkSession): this.type = super.session(sparkSession)
284-
285-
// override for Java compatibility
286-
override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession)
287268
}
288269

289270
/**
@@ -352,9 +333,6 @@ abstract class MLReader[T] extends BaseReadWrite {
352333

353334
// override for Java compatibility
354335
override def session(sparkSession: SparkSession): this.type = super.session(sparkSession)
355-
356-
// override for Java compatibility
357-
override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession)
358336
}
359337

360338
/**

0 commit comments

Comments
 (0)