|
11 | 11 | from onetl.connection import MSSQL, Clickhouse, Hive, MySQL, Oracle, Postgres, SparkS3 |
12 | 12 | from onetl.connection.file_connection.s3 import S3 |
13 | 13 | from onetl.db import DBWriter |
14 | | -from onetl.file.format import CSV, JSON, ORC, Excel, JSONLine, Parquet |
| 14 | +from onetl.file.format import CSV, JSON, ORC, XML, Excel, JSONLine, Parquet |
15 | 15 | from pyspark.sql import DataFrame, SparkSession |
16 | 16 | from pyspark.sql.types import ( |
17 | 17 | DateType, |
@@ -114,8 +114,11 @@ def spark(settings: Settings, request: FixtureRequest) -> SparkSession: |
114 | 114 | ) |
115 | 115 |
|
116 | 116 | if "hdfs" in markers or "s3" in markers: |
117 | | - # see supported versions from https://mvnrepository.com/artifact/com.crealytics/spark-excel |
118 | | - maven_packages.extend(Excel.get_packages(spark_version="3.5.1")) |
| 117 | + # excel version is hardcoded due to https://github.com/nightscape/spark-excel/issues/902 |
| 118 | + file_formats_spark_packages: list[str] = XML.get_packages( |
| 119 | + spark_version=pyspark.__version__, |
| 120 | + ) + Excel.get_packages(spark_version="3.5.1") |
| 121 | + maven_packages.extend(file_formats_spark_packages) |
119 | 122 |
|
120 | 123 | if maven_packages: |
121 | 124 | spark = spark.config("spark.jars.packages", ",".join(maven_packages)) |
@@ -845,6 +848,12 @@ def source_file_format(request: FixtureRequest): |
845 | 848 | **params, |
846 | 849 | ) |
847 | 850 |
|
| 851 | + if name == "xml": |
| 852 | + return "xml", XML( |
| 853 | + row_tag="item", |
| 854 | + **params, |
| 855 | + ) |
| 856 | + |
848 | 857 | raise ValueError(f"Unsupported file format: {name}") |
849 | 858 |
|
850 | 859 |
|
@@ -883,6 +892,12 @@ def target_file_format(request: FixtureRequest): |
883 | 892 | **params, |
884 | 893 | ) |
885 | 894 |
|
| 895 | + if name == "xml": |
| 896 | + return "xml", XML( |
| 897 | + row_tag="item", |
| 898 | + **params, |
| 899 | + ) |
| 900 | + |
886 | 901 | raise ValueError(f"Unsupported file format: {name}") |
887 | 902 |
|
888 | 903 |
|
|
0 commit comments