Skip to content

Commit 8d0f490

Browse files
authored
#28 Bump spark commons version
* Bump spark commons version * Add provided dependencies docs to the readme
1 parent 53c412e commit 8d0f490

File tree

6 files changed

+67
-22
lines changed

6 files changed

+67
-22
lines changed

README.md

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,27 @@
66
- Dataframe in
77
- Standardized Dataframe out
88

9-
## Dependency
10-
SBT:
9+
## Usage
10+
11+
### Needed Provided Dependencies
12+
13+
The library needs following dependencies to be included in your project
14+
15+
```sbt
16+
"org.apache.spark" %% "spark-core" % SPARK_VERSION,
17+
"org.apache.spark" %% "spark-sql" % SPARK_VERSION,
18+
"za.co.absa" %% s"spark-commons-spark${SPARK_MAJOR}.${SPARK_MINOR}" % "0.3.1",
19+
```
20+
21+
### Usage in SBT:
1122
```sbt
1223
"za.co.absa" %% "spark-data-standardization" % VERSION
1324
```
1425

26+
### Usage in Maven
27+
1528
### Scala 2.11 [![Maven Central](https://maven-badges.herokuapp.com/maven-central/za.co.absa/spark-data-standardization_2.11/badge.svg)](https://maven-badges.herokuapp.com/maven-central/za.co.absa/spark-data-standardization_2.11)
1629

17-
Maven
1830
```xml
1931
<dependency>
2032
<groupId>za.co.absa</groupId>

build.sbt

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
* See the License for the specific language governing permissions and
1414
* limitations under the License.
1515
*/
16+
1617
import sys.process._
18+
import Dependencies._
1719

1820
ThisBuild / name := "spark-data-standardization"
1921
ThisBuild / organization := "za.co.absa"
@@ -26,24 +28,13 @@ ThisBuild / scalaVersion := scala211
2628

2729
ThisBuild / versionScheme := Some("early-semver")
2830

29-
def sparkVersion(scalaVersion: String): String = if (scalaVersion==scala212) "3.2.1" else "2.4.7"
30-
31-
def sparkFastTestsVersion(scalaVersion: String): String = if (scalaVersion == scala212) "1.1.0" else "0.23.0"
32-
33-
libraryDependencies ++= List(
34-
"org.apache.spark" %% "spark-core" % sparkVersion(scalaVersion.value) % "provided",
35-
"org.apache.spark" %% "spark-sql" % sparkVersion(scalaVersion.value) % "provided",
36-
"za.co.absa" %% "spark-commons" % "0.2.0",
37-
"com.github.mrpowers" %% "spark-fast-tests" % sparkFastTestsVersion(scalaVersion.value) % Test,
38-
"org.scalatest" %% "scalatest" % "3.2.2" % Test,
39-
"com.typesafe" % "config" % "1.4.1"
40-
)
31+
libraryDependencies ++= dependencyList(scalaVersion.value)
4132

4233
lazy val printSparkScalaVersion = taskKey[Unit]("Print Spark and Scala versions for standardization")
4334
ThisBuild / printSparkScalaVersion := {
4435
val log = streams.value.log
4536
val scalaVers = scalaVersion.value
46-
log.info(s"Building with Spark ${sparkVersion(scalaVers)}, Scala ${scalaVers}")
37+
log.info(s"Building with Spark ${getSparkVersion(scalaVers)}, Scala ${scalaVers}")
4738
}
4839

4940
Test / parallelExecution := false

project/Dependencies.scala

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Copyright 2022 ABSA Group Limited
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
import sbt._
18+
19+
object Dependencies {
20+
private def getSparkVersionUpToMinor(sparkVersion: String): String = {
21+
val pattern = "([0-9]+)\\.([0-9]+)\\.([0-9]+)".r
22+
val pattern(major, minor, patch) = sparkVersion
23+
s"$major.$minor"
24+
}
25+
26+
private def sparkFastTestsVersion(scalaVersion: String): String = if (scalaVersion.startsWith("2.11")) "0.23.0" else "1.1.0"
27+
28+
def getSparkVersion(scalaVersion: String): String = if (scalaVersion.startsWith("2.11")) "2.4.7" else "3.2.1"
29+
30+
def dependencyList(scalaVersion: String): Seq[ModuleID] = {
31+
val sparkVersion = getSparkVersion(scalaVersion)
32+
val sparkVersionUpToMinor = getSparkVersionUpToMinor(sparkVersion)
33+
List(
34+
"org.apache.spark" %% "spark-core" % sparkVersion % Provided,
35+
"org.apache.spark" %% "spark-sql" % sparkVersion % Provided,
36+
"za.co.absa" %% s"spark-commons-spark$sparkVersionUpToMinor" % "0.3.1" % Provided,
37+
"za.co.absa" %% "spark-commons-test" % "0.3.1" % Test,
38+
"com.typesafe" % "config" % "1.4.1",
39+
"com.github.mrpowers" %% "spark-fast-tests" % sparkFastTestsVersion(scalaVersion) % Test,
40+
"org.scalatest" %% "scalatest" % "3.2.2" % Test
41+
)
42+
}
43+
}

src/main/scala/za/co/absa/standardization/types/TypedStructField.scala

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,18 @@ import java.sql.{Date, Timestamp}
2020
import java.util.Base64
2121

2222
import org.apache.spark.sql.types._
23-
import za.co.absa.spark.commons.implicits.StructFieldImplicits.{StructFieldEnhancements, StructFieldMetadataEnhancements}
23+
import za.co.absa.spark.commons.implicits.StructFieldImplicits.StructFieldMetadataEnhancements
2424
import za.co.absa.standardization.ValidationIssue
2525
import za.co.absa.standardization.numeric.{DecimalSymbols, NumericPattern, Radix}
2626
import za.co.absa.standardization.schema.{MetadataKeys, MetadataValues}
2727
import za.co.absa.standardization.time.DateTimePattern
2828
import za.co.absa.standardization.typeClasses.{DoubleLike, LongLike}
2929
import za.co.absa.standardization.types.parsers._
3030
import za.co.absa.standardization.validation.field._
31-
3231
import scala.util.{Failure, Success, Try}
3332

34-
sealed abstract class TypedStructField(structField: StructField)(implicit defaults: TypeDefaults)
35-
extends StructFieldEnhancements(structField) with Serializable {
33+
sealed abstract class TypedStructField(val structField: StructField)(implicit defaults: TypeDefaults)
34+
extends Serializable {
3635

3736
type BaseType
3837

src/test/scala/za/co/absa/standardization/interpreter/StandardizationInterpreterSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ class StandardizationInterpreterSuite extends AnyFunSuite with SparkTestBase wit
343343

344344

345345
val srcString:String = FileReader.readFileAsString("src/test/resources/data/patients.json")
346-
val src = JsonUtils.getDataFrameFromJson(spark, Seq(srcString))
346+
val src = JsonUtils.getDataFrameFromJson(Seq(srcString))
347347

348348
logDataFrameContent(src)
349349

src/test/scala/za/co/absa/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ class StandardizationInterpreter_ArraySuite extends AnyFunSuite with SparkTestBa
208208
val seq = Seq(
209209
s"""{"$fieldName": [["a", "bb", "ccc"],["1", "12"],["Hello", null, "World"]]}"""
210210
)
211-
val src = JsonUtils.getDataFrameFromJson(spark, seq)
211+
val src = JsonUtils.getDataFrameFromJson(seq)
212212

213213
val subArrayJson = """{"type": "array", "elementType": "string", "containsNull": false}"""
214214
val desiredSchema = generateDesiredSchema(subArrayJson, s""""${MetadataKeys.DefaultValue}": "Nope"""")

0 commit comments

Comments
 (0)