Skip to content

Commit fe9f0c0

Browse files
authored
Merge pull request #295 from ing-bank/test_spark4_scala213
Use spark4 with scala213
2 parents b9d7b00 + ce8117c commit fe9f0c0

File tree

10 files changed

+27
-20
lines changed

10 files changed

+27
-20
lines changed

.github/workflows/build.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ jobs:
5252
python: "3.12"
5353
os: ubuntu-latest
5454
dependency_constraints: '"pandas>=2" "numpy>=2"'
55+
- SPARK_VERSION: "4.0.0"
56+
HADOOP_VERSION: "3"
57+
JAVA_VERSION: "17"
58+
python: "3.12"
59+
os: ubuntu-latest
60+
dependency_constraints: '"pandas>=2" "numpy>=2"'
5561
runs-on: ${{ matrix.os }}
5662
name: ${{ matrix.os }}, Spark ${{ matrix.SPARK_VERSION}}, Python ${{ matrix.python }}
5763

README.rst

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,26 +17,27 @@ It works with numerical, ordinal, categorical features, and the histograms can b
1717
as trends, shifts, peaks, outliers, anomalies, changing correlations, etc,
1818
using monitoring business rules.
1919

20+
Latest update: Sep 2025.
21+
2022
|example|
2123

2224
|histograms|
2325

24-
Announcements
25-
=============
26-
27-
Spark 3.0
28-
---------
26+
Spark
27+
=====
2928

30-
With Spark 3.0, based on Scala 2.12, make sure to pick up the correct `histogrammar` jar files:
29+
For Spark make sure to pick up the correct `histogrammar` jar files. Spark 4.X is based on Scala 2.13; Spark 3.X is based on Scala 2.12 or 2.13.
3130

3231
.. code-block:: python
3332
3433
spark = SparkSession.builder.config(
3534
"spark.jars.packages",
36-
"io.github.histogrammar:histogrammar_2.12:1.0.20,io.github.histogrammar:histogrammar-sparksql_2.12:1.0.20",
35+
"io.github.histogrammar:histogrammar_2.13:1.0.30,io.github.histogrammar:histogrammar-sparksql_2.13:1.0.30",
3736
).getOrCreate()
3837
39-
For Spark 2.X compiled against scala 2.11, in the string above simply replace 2.12 with 2.11.
38+
39+
For Scala 2.12, in the string above simply replace "2.13" with "2.12" (twice).
40+
4041

4142
Examples
4243
========

popmon/notebooks/popmon_tutorial_advanced.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,9 @@
175175
"outputs": [],
176176
"source": [
177177
"if pyspark_installed:\n",
178-
" scala = \"2.12\" if int(pyspark_version[0]) >= 3 else \"2.11\"\n",
179-
" hist_jar = f\"io.github.histogrammar:histogrammar_{scala}:1.0.20\"\n",
180-
" hist_spark_jar = f\"io.github.histogrammar:histogrammar-sparksql_{scala}:1.0.20\"\n",
178+
" scala = \"2.12\" if int(pyspark_version[0]) == 3 else \"2.13\"\n",
179+
" hist_jar = f\"io.github.histogrammar:histogrammar_{scala}:1.0.30\"\n",
180+
" hist_spark_jar = f\"io.github.histogrammar:histogrammar-sparksql_{scala}:1.0.30\"\n",
181181
"\n",
182182
" spark = SparkSession.builder.config(\n",
183183
" \"spark.jars.packages\", f\"{hist_spark_jar},{hist_jar}\"\n",

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ dependencies = [
2424
"numpy>=1.18.0",
2525
"pandas>=0.25.1",
2626
"scipy>=1.5.2",
27-
"histogrammar>=1.1.1",
27+
"histogrammar>=1.1.2",
2828
"phik",
2929
"jinja2",
3030
"tqdm",
34 KB
Binary file not shown.
34.1 KB
Binary file not shown.
773 KB
Binary file not shown.
771 KB
Binary file not shown.

tests/popmon/spark/test_spark.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ def spark_context():
2323

2424
current_path = Path(__file__).parent
2525

26-
scala = "2.12" if int(pyspark_version[0]) >= 3 else "2.11"
27-
hist_spark_jar = current_path / f"jars/histogrammar-sparksql_{scala}-1.0.11.jar"
28-
hist_jar = current_path / f"jars/histogrammar_{scala}-1.0.11.jar"
26+
scala = "2.12" if int(pyspark_version[0]) == 3 else "2.13"
27+
hist_spark_jar = current_path / f"jars/histogrammar-sparksql_{scala}-1.0.30.jar"
28+
hist_jar = current_path / f"jars/histogrammar_{scala}-1.0.30.jar"
2929

3030
spark = (
3131
SparkSession.builder.master("local")
@@ -115,10 +115,10 @@ def test_spark_make_histograms(spark_context):
115115
# backwards compatibility
116116
for name in names:
117117
v1 = deepcopy(getattr(pytest, name))
118-
v1["data"]["name"] = f"'{name}'"
118+
v1["data"]["name"] = f"{name}"
119119

120120
v2 = deepcopy(getattr(pytest, name))
121-
v2["data"]["name"] = f"b'{name}'"
121+
v2["data"]["name"] = f"b{name}"
122122

123123
output = current_hists[name].toJson()
124124
assert output in (v1, v2)

tests/popmon/spark/test_split_dataset_spark.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ def spark_context():
2222

2323
current_path = Path(__file__).parent
2424

25-
scala = "2.12" if int(pyspark_version[0]) >= 3 else "2.11"
26-
hist_spark_jar = current_path / f"jars/histogrammar-sparksql_{scala}-1.0.11.jar"
27-
hist_jar = current_path / f"jars/histogrammar_{scala}-1.0.11.jar"
25+
scala = "2.12" if int(pyspark_version[0]) == 3 else "2.13"
26+
hist_spark_jar = current_path / f"jars/histogrammar-sparksql_{scala}-1.0.30.jar"
27+
hist_jar = current_path / f"jars/histogrammar_{scala}-1.0.30.jar"
2828

2929
spark = (
3030
SparkSession.builder.master("local")

0 commit comments

Comments
 (0)