ifilonenko
diff --git a/‎R/pkg/R/stats.R
Lines changed: 2 additions & 2 deletions b/‎R/pkg/R/stats.R
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
Lines changed: 3 additions & 3 deletions b/‎core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
Lines changed: 3 additions & 3 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
Lines changed: 4 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
Lines changed: 4 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rdd/RDD.scala
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/rdd/RDD.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/ml-classification-regression.md
Lines changed: 2 additions & 2 deletions b/‎docs/ml-classification-regression.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/ml-collaborative-filtering.md
Lines changed: 2 additions & 2 deletions b/‎docs/ml-collaborative-filtering.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/ml-frequent-pattern-mining.md
Lines changed: 4 additions & 4 deletions b/‎docs/ml-frequent-pattern-mining.md
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/mllib-collaborative-filtering.md
Lines changed: 2 additions & 2 deletions b/‎docs/mllib-collaborative-filtering.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/mllib-frequent-pattern-mining.md
Lines changed: 3 additions & 3 deletions b/‎docs/mllib-frequent-pattern-mining.md
Lines changed: 3 additions & 3 deletions
@@ -109,7 +109,7 @@ setMethod("corr",
 #'
 #' Finding frequent items for columns, possibly with false positives.
 #' Using the frequent element count algorithm described in
-#' \url{http://dx.doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou.
+#' \url{https://doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou.
 #'
 #' @param x A SparkDataFrame.
 #' @param cols A vector column names to search frequent items in.
@@ -143,7 +143,7 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"),
 #' *exact* rank of x is close to (p * N). More precisely,
 #'   floor((p - err) * N) <= rank(x) <= ceil((p + err) * N).
 #' This method implements a variation of the Greenwald-Khanna algorithm (with some speed
-#' optimizations). The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670
+#' optimizations). The algorithm was first present in [[https://doi.org/10.1145/375663.375670
 #' Space-efficient Online Computation of Quantile Summaries]] by Greenwald and Khanna.
 #' Note that NA values will be ignored in numerical columns before calculation. For
 #'   columns only containing NA values, an empty list is returned.
 
@@ -952,7 +952,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
@@ -969,7 +969,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
@@ -985,7 +985,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
 
@@ -685,7 +685,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
 
@@ -394,7 +394,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero (`sp` is
    * greater than `p`) would trigger sparse representation of registers, which may reduce the
@@ -436,7 +436,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
@@ -456,7 +456,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
@@ -473,7 +473,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
 
@@ -1258,7 +1258,7 @@ abstract class RDD[T: ClassTag](
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero (`sp` is greater
    * than `p`) would trigger sparse representation of registers, which may reduce the memory
@@ -1290,7 +1290,7 @@ abstract class RDD[T: ClassTag](
    *
    * The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
-   * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
+   * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.
    *
    * @param relativeSD Relative accuracy. Smaller values create counters that require more space.
    *                   It must be greater than 0.000017.
 
@@ -941,9 +941,9 @@ Essentially isotonic regression is a
 best fitting the original data points.
 
 We implement a
-[pool adjacent violators algorithm](http://doi.org/10.1198/TECH.2010.10111)
+[pool adjacent violators algorithm](https://doi.org/10.1198/TECH.2010.10111)
 which uses an approach to
-[parallelizing isotonic regression](http://doi.org/10.1007/978-3-642-99789-1_10).
+[parallelizing isotonic regression](https://doi.org/10.1007/978-3-642-99789-1_10).
 The training input is a DataFrame which contains three columns
 label, features and weight. Additionally, IsotonicRegression algorithm has one
 optional parameter called $isotonic$ defaulting to true.
 
@@ -41,7 +41,7 @@ for example, users giving ratings to movies.
 
 It is common in many real-world use cases to only have access to *implicit feedback* (e.g. views,
 clicks, purchases, likes, shares etc.). The approach used in `spark.ml` to deal with such data is taken
-from [Collaborative Filtering for Implicit Feedback Datasets](http://dx.doi.org/10.1109/ICDM.2008.22).
+from [Collaborative Filtering for Implicit Feedback Datasets](https://doi.org/10.1109/ICDM.2008.22).
 Essentially, instead of trying to model the matrix of ratings directly, this approach treats the data
 as numbers representing the *strength* in observations of user actions (such as the number of clicks,
 or the cumulative duration someone spent viewing a movie). Those numbers are then related to the level of
@@ -55,7 +55,7 @@ We scale the regularization parameter `regParam` in solving each least squares p
 the number of ratings the user generated in updating user factors,
 or the number of ratings the product received in updating product factors.
 This approach is named "ALS-WR" and discussed in the paper
-"[Large-Scale Parallel Collaborative Filtering for the Netflix Prize](http://dx.doi.org/10.1007/978-3-540-68880-8_32)".
+"[Large-Scale Parallel Collaborative Filtering for the Netflix Prize](https://doi.org/10.1007/978-3-540-68880-8_32)".
 It makes `regParam` less dependent on the scale of the dataset, so we can apply the
 best parameter learned from a sampled subset to the full dataset and expect similar performance.
 
 
@@ -18,15 +18,15 @@ for more information.
 ## FP-Growth
 
 The FP-growth algorithm is described in the paper
-[Han et al., Mining frequent patterns without candidate generation](http://dx.doi.org/10.1145/335191.335372),
+[Han et al., Mining frequent patterns without candidate generation](https://doi.org/10.1145/335191.335372),
 where "FP" stands for frequent pattern.
 Given a dataset of transactions, the first step of FP-growth is to calculate item frequencies and identify frequent items.
 Different from [Apriori-like](http://en.wikipedia.org/wiki/Apriori_algorithm) algorithms designed for the same purpose,
 the second step of FP-growth uses a suffix tree (FP-tree) structure to encode transactions without generating candidate sets
 explicitly, which are usually expensive to generate.
 After the second step, the frequent itemsets can be extracted from the FP-tree.
 In `spark.mllib`, we implemented a parallel version of FP-growth called PFP,
-as described in [Li et al., PFP: Parallel FP-growth for query recommendation](http://dx.doi.org/10.1145/1454008.1454027).
+as described in [Li et al., PFP: Parallel FP-growth for query recommendation](https://doi.org/10.1145/1454008.1454027).
 PFP distributes the work of growing FP-trees based on the suffixes of transactions,
 and hence is more scalable than a single-machine implementation.
 We refer users to the papers for more details.
@@ -90,7 +90,7 @@ Refer to the [R API docs](api/R/spark.fpGrowth.html) for more details.
 
 PrefixSpan is a sequential pattern mining algorithm described in
 [Pei et al., Mining Sequential Patterns by Pattern-Growth: The
-PrefixSpan Approach](http://dx.doi.org/10.1109%2FTKDE.2004.77). We refer
+PrefixSpan Approach](https://doi.org/10.1109%2FTKDE.2004.77). We refer
 the reader to the referenced paper for formalizing the sequential
 pattern mining problem.
 
@@ -137,4 +137,4 @@ Refer to the [R API docs](api/R/spark.prefixSpan.html) for more details.
 {% include_example r/ml/prefixSpan.R %}
 </div>
 
-</div>
+</div>
@@ -37,7 +37,7 @@ for example, users giving ratings to movies.
 
 It is common in many real-world use cases to only have access to *implicit feedback* (e.g. views,
 clicks, purchases, likes, shares etc.). The approach used in `spark.mllib` to deal with such data is taken
-from [Collaborative Filtering for Implicit Feedback Datasets](http://dx.doi.org/10.1109/ICDM.2008.22).
+from [Collaborative Filtering for Implicit Feedback Datasets](https://doi.org/10.1109/ICDM.2008.22).
 Essentially, instead of trying to model the matrix of ratings directly, this approach treats the data
 as numbers representing the *strength* in observations of user actions (such as the number of clicks,
 or the cumulative duration someone spent viewing a movie). Those numbers are then related to the level of
@@ -51,7 +51,7 @@ Since v1.1, we scale the regularization parameter `lambda` in solving each least
 the number of ratings the user generated in updating user factors,
 or the number of ratings the product received in updating product factors.
 This approach is named "ALS-WR" and discussed in the paper
-"[Large-Scale Parallel Collaborative Filtering for the Netflix Prize](http://dx.doi.org/10.1007/978-3-540-68880-8_32)".
+"[Large-Scale Parallel Collaborative Filtering for the Netflix Prize](https://doi.org/10.1007/978-3-540-68880-8_32)".
 It makes `lambda` less dependent on the scale of the dataset, so we can apply the
 best parameter learned from a sampled subset to the full dataset and expect similar performance.
 
 
@@ -15,15 +15,15 @@ a popular algorithm to mining frequent itemsets.
 ## FP-growth
 
 The FP-growth algorithm is described in the paper
-[Han et al., Mining frequent patterns without candidate generation](http://dx.doi.org/10.1145/335191.335372),
+[Han et al., Mining frequent patterns without candidate generation](https://doi.org/10.1145/335191.335372),
 where "FP" stands for frequent pattern.
 Given a dataset of transactions, the first step of FP-growth is to calculate item frequencies and identify frequent items.
 Different from [Apriori-like](http://en.wikipedia.org/wiki/Apriori_algorithm) algorithms designed for the same purpose,
 the second step of FP-growth uses a suffix tree (FP-tree) structure to encode transactions without generating candidate sets
 explicitly, which are usually expensive to generate.
 After the second step, the frequent itemsets can be extracted from the FP-tree.
 In `spark.mllib`, we implemented a parallel version of FP-growth called PFP,
-as described in [Li et al., PFP: Parallel FP-growth for query recommendation](http://dx.doi.org/10.1145/1454008.1454027).
+as described in [Li et al., PFP: Parallel FP-growth for query recommendation](https://doi.org/10.1145/1454008.1454027).
 PFP distributes the work of growing FP-trees based on the suffixes of transactions,
 and hence more scalable than a single-machine implementation.
 We refer users to the papers for more details.
@@ -122,7 +122,7 @@ Refer to the [`AssociationRules` Java docs](api/java/org/apache/spark/mllib/fpm/
 
 PrefixSpan is a sequential pattern mining algorithm described in
 [Pei et al., Mining Sequential Patterns by Pattern-Growth: The
-PrefixSpan Approach](http://dx.doi.org/10.1109%2FTKDE.2004.77). We refer
+PrefixSpan Approach](https://doi.org/10.1109%2FTKDE.2004.77). We refer
 the reader to the referenced paper for formalizing the sequential
 pattern mining problem.
Original file line number	Diff line number	Diff line change
`@@ -952,7 +952,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])`
`952`	`952`	`*`
`953`	`953`	`* The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:`
`954`	`954`	`* Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available`
`955`		`- * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.`
	`955`	`+ * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.`
`956`	`956`	`*`
`957`	`957`	`* @param relativeSD Relative accuracy. Smaller values create counters that require more space.`
`958`	`958`	`* It must be greater than 0.000017.`
`@@ -969,7 +969,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])`
`969`	`969`	`*`
`970`	`970`	`* The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:`
`971`	`971`	`* Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available`
`972`		`- * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.`
	`972`	`+ * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.`
`973`	`973`	`*`
`974`	`974`	`* @param relativeSD Relative accuracy. Smaller values create counters that require more space.`
`975`	`975`	`* It must be greater than 0.000017.`
`@@ -985,7 +985,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])`
`985`	`985`	`*`
`986`	`986`	`* The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:`
`987`	`987`	`* Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available`
`988`		`- * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.`
	`988`	`+ * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.`
`989`	`989`	`*`
`990`	`990`	`* @param relativeSD Relative accuracy. Smaller values create counters that require more space.`
`991`	`991`	`* It must be greater than 0.000017.`
Original file line number	Diff line number	Diff line change
`@@ -685,7 +685,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {`
`685`	`685`	`*`
`686`	`686`	`* The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:`
`687`	`687`	`* Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available`
`688`		`- * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.`
	`688`	`+ * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.`
`689`	`689`	`*`
`690`	`690`	`* @param relativeSD Relative accuracy. Smaller values create counters that require more space.`
`691`	`691`	`* It must be greater than 0.000017.`
Original file line number	Diff line number	Diff line change
`@@ -394,7 +394,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])`
`394`	`394`	`*`
`395`	`395`	`* The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:`
`396`	`396`	`* Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available`
`397`		`- * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.`
	`397`	`+ * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.`
`398`	`398`	`*`
`399`	`399`	* The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero (`sp` is
`400`	`400`	* greater than `p`) would trigger sparse representation of registers, which may reduce the
`@@ -436,7 +436,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])`
`436`	`436`	`*`
`437`	`437`	`* The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:`
`438`	`438`	`* Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available`
`439`		`- * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.`
	`439`	`+ * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.`
`440`	`440`	`*`
`441`	`441`	`* @param relativeSD Relative accuracy. Smaller values create counters that require more space.`
`442`	`442`	`* It must be greater than 0.000017.`
`@@ -456,7 +456,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])`
`456`	`456`	`*`
`457`	`457`	`* The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:`
`458`	`458`	`* Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available`
`459`		`- * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.`
	`459`	`+ * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.`
`460`	`460`	`*`
`461`	`461`	`* @param relativeSD Relative accuracy. Smaller values create counters that require more space.`
`462`	`462`	`* It must be greater than 0.000017.`
`@@ -473,7 +473,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])`
`473`	`473`	`*`
`474`	`474`	`* The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:`
`475`	`475`	`* Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available`
`476`		`- * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.`
	`476`	`+ * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.`
`477`	`477`	`*`
`478`	`478`	`* @param relativeSD Relative accuracy. Smaller values create counters that require more space.`
`479`	`479`	`* It must be greater than 0.000017.`
Original file line number	Diff line number	Diff line change
`@@ -1258,7 +1258,7 @@ abstract class RDD[T: ClassTag](`
`1258`	`1258`	`*`
`1259`	`1259`	`* The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:`
`1260`	`1260`	`* Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available`
`1261`		`- * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.`
	`1261`	`+ * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.`
`1262`	`1262`	`*`
`1263`	`1263`	* The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero (`sp` is greater
`1264`	`1264`	* than `p`) would trigger sparse representation of registers, which may reduce the memory
`@@ -1290,7 +1290,7 @@ abstract class RDD[T: ClassTag](`
`1290`	`1290`	`*`
`1291`	`1291`	`* The algorithm used is based on streamlib's implementation of "HyperLogLog in Practice:`
`1292`	`1292`	`* Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available`
`1293`		`- * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.`
	`1293`	`+ * <a href="https://doi.org/10.1145/2452376.2452456">here</a>.`
`1294`	`1294`	`*`
`1295`	`1295`	`* @param relativeSD Relative accuracy. Smaller values create counters that require more space.`
`1296`	`1296`	`* It must be greater than 0.000017.`