@@ -18,7 +18,6 @@ package za.co.absa.cobrix.spark.cobol.source.integration
1818
1919import org .scalatest .Assertion
2020import org .scalatest .funsuite .AnyFunSuite
21- import org .slf4j .{Logger , LoggerFactory }
2221import za .co .absa .cobrix .cobol .parser .CopybookParser
2322import za .co .absa .cobrix .cobol .parser .policies .DebugFieldsPolicy
2423import za .co .absa .cobrix .spark .cobol .source .base .{SimpleComparisonBase , SparkTestBase }
@@ -30,8 +29,6 @@ import java.nio.file.{Files, Paths}
3029import scala .collection .JavaConverters ._
3130
3231class Test40CompressesFilesSpec extends AnyFunSuite with SparkTestBase with BinaryFileFixture with SimpleComparisonBase {
33- private implicit val logger : Logger = LoggerFactory .getLogger(this .getClass)
34-
3532 private val exampleName = " Test40 (compressed files)"
3633
3734 private val inputCopybookPath = " file://../data/test40_copybook.cob"
@@ -92,15 +89,41 @@ class Test40CompressesFilesSpec extends AnyFunSuite with SparkTestBase with Bina
9289 succeed
9390 }
9491
95- test(" Test gzip" ) {
92+ def testAsciiFile (options : Map [String , String ]): Assertion = {
93+ val inputDataPath = " ../data/test40_data_ascii/ascii.txt.gz"
94+
95+ val df = spark
96+ .read
97+ .format(" cobol" )
98+ .option(" copybook_contents" ,
99+ """
100+ | 01 RECORD.
101+ | 05 DATA PIC X(5).
102+ |""" .stripMargin)
103+ .option(" record_format" , " D" )
104+ .option(" pedantic" , " true" )
105+ .options(options)
106+ .load(inputDataPath)
107+
108+ assert(df.count == 3 )
109+
110+ val actual = df.orderBy(" data" )
111+ .collect()
112+ .map(a => a.getString(0 ))
113+ .mkString(" ," )
114+
115+ assert(actual == " 12345,67890,A1234" )
116+ }
117+
118+ test(" Test compressed EBCDIC gzip file" ) {
96119 testCompressedFile(" ../data/test40_data/example.dat.gz" )
97120 }
98121
99- test(" Test bzip2" ) {
122+ test(" Test compressed EBCDIC bzip2 file " ) {
100123 testCompressedFile(" ../data/test40_data/example.dat.bz2" )
101124 }
102125
103- test(" read mixed compressed files" ) {
126+ test(" read mixed compressed EBCDIC files" ) {
104127 val inputDataPath = " ../data/test40_data"
105128
106129 val df = spark
@@ -115,4 +138,23 @@ class Test40CompressesFilesSpec extends AnyFunSuite with SparkTestBase with Bina
115138
116139 assert(df.count == 300 )
117140 }
141+
142+ test(" read a compressed ASCII file 1" ) {
143+ testAsciiFile(Map (
144+ " record_format" -> " D"
145+ ))
146+ }
147+
148+ test(" read a compressed ASCII file 2" ) {
149+ testAsciiFile(Map (
150+ " record_format" -> " D" ,
151+ " ascii_charset" -> " ISO-8859-1"
152+ ))
153+ }
154+
155+ test(" read a compressed ASCII file 3" ) {
156+ testAsciiFile(Map (
157+ " record_format" -> " D2"
158+ ))
159+ }
118160}
0 commit comments