Skip to content

Commit 0b3aa3e

Browse files
committed
#809 Add a test suite for reading mixed compressed and uncompressed ASCII files.
1 parent 54b2abd commit 0b3aa3e

File tree

3 files changed

+54
-4
lines changed

3 files changed

+54
-4
lines changed

data/test40_data_ascii/ascii.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
12345
2+
67890
3+
A1234
44 Bytes
Binary file not shown.

spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/integration/Test40CompressesFilesSpec.scala

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class Test40CompressesFilesSpec extends AnyFunSuite with SparkTestBase with Bina
8989
succeed
9090
}
9191

92-
def testAsciiFile(options: Map[String, String]): Assertion = {
92+
def testCompressedAsciiFile(options: Map[String, String]): Assertion = {
9393
val inputDataPath = "../data/test40_data_ascii/ascii.txt.gz"
9494

9595
val df = spark
@@ -115,6 +115,32 @@ class Test40CompressesFilesSpec extends AnyFunSuite with SparkTestBase with Bina
115115
assert(actual == "12345,67890,A1234")
116116
}
117117

118+
def testMixedAsciiFiles(options: Map[String, String]): Assertion = {
119+
val inputDataPath = "../data/test40_data_ascii/"
120+
121+
val df = spark
122+
.read
123+
.format("cobol")
124+
.option("copybook_contents",
125+
"""
126+
| 01 RECORD.
127+
| 05 DATA PIC X(5).
128+
|""".stripMargin)
129+
.option("record_format", "D")
130+
.option("pedantic", "true")
131+
.options(options)
132+
.load(inputDataPath)
133+
134+
assert(df.count == 6)
135+
136+
val actual = df.orderBy("data")
137+
.collect()
138+
.map(a => a.getString(0))
139+
.mkString(",")
140+
141+
assert(actual == "12345,12345,67890,67890,A1234,A1234")
142+
}
143+
118144
test("Test compressed EBCDIC gzip file") {
119145
testCompressedFile("../data/test40_data/example.dat.gz")
120146
}
@@ -140,21 +166,42 @@ class Test40CompressesFilesSpec extends AnyFunSuite with SparkTestBase with Bina
140166
}
141167

142168
test("read a compressed ASCII file 1") {
143-
testAsciiFile(Map(
169+
testCompressedAsciiFile(Map(
144170
"record_format" -> "D"
145171
))
146172
}
147173

148174
test("read a compressed ASCII file 2") {
149-
testAsciiFile(Map(
175+
testCompressedAsciiFile(Map(
150176
"record_format" -> "D",
151177
"ascii_charset" -> "ISO-8859-1"
152178
))
153179
}
154180

155181
test("read a compressed ASCII file 3") {
156-
testAsciiFile(Map(
182+
testCompressedAsciiFile(Map(
183+
"record_format" -> "D2"
184+
))
185+
}
186+
187+
test("read a mixed ASCII files 1") {
188+
testMixedAsciiFiles(Map(
189+
"record_format" -> "D"
190+
))
191+
}
192+
193+
test("read a mixed ASCII files 2") {
194+
testMixedAsciiFiles(Map(
195+
"record_format" -> "D",
196+
"ascii_charset" -> "ISO-8859-1"
197+
))
198+
}
199+
200+
test("read a mixed ASCII files 3") {
201+
testMixedAsciiFiles(Map(
157202
"record_format" -> "D2"
158203
))
159204
}
160205
}
206+
207+

0 commit comments

Comments
 (0)