#809 Fix PR suggestions (Thanks @coderabbitai).

yruslan · yruslan · commit 27665e764ece · 2025-12-19T11:26:51.000+01:00
diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/VarLenNestedReader.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/VarLenNestedReader.scala
@@ -153,8 +153,9 @@ class VarLenNestedReader[T: ClassTag](copybookContents: Seq[String],
       logger.info(s"Input split size = ${inputSplitSizeRecords.get} records")
     } else {
       if (inputSplitSizeMB.nonEmpty) {
-        if (inputSplitSizeMB.get < 1 || inputSplitSizeMB.get > 200000) {
-          throw new IllegalArgumentException(s"Invalid input split size of ${inputSplitSizeMB.get} MB.")
+        val maxSplitSizeMB = if (dataStream.isCompressed) 200000 else 2000
+        if (inputSplitSizeMB.get < 1 || inputSplitSizeMB.get > maxSplitSizeMB) {
+          throw new IllegalArgumentException(s"Invalid input split size of ${inputSplitSizeMB.get} MB (max allowed: $maxSplitSizeMB MB).")
         }
         logger.info(s"Input split size = ${inputSplitSizeMB.get} MB")
       }
diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/streaming/BufferedFSDataInputStream.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/streaming/BufferedFSDataInputStream.scala
@@ -90,9 +90,9 @@ class BufferedFSDataInputStream(filePath: Path, hadoopConfig: Configuration, sta
             val available = bufferContainBytes - bufferPos
             val bytesToCopy = Math.min(lengthLeft, available)
             System.arraycopy(buffer, bufferPos, b, offsetLeft, bytesToCopy)
-            bufferPos += bufferContainBytes
-            offsetLeft += bufferContainBytes
-            lengthLeft -= bufferContainBytes
+            bufferPos += bytesToCopy
+            offsetLeft += bytesToCopy
+            lengthLeft -= bytesToCopy
           }
         }
       }
@@ -136,7 +136,7 @@ class BufferedFSDataInputStream(filePath: Path, hadoopConfig: Configuration, sta
 
     if (startOffset > 0) {
       if (codec == null) {
-        baseStream.seek(startOffset)
+        fsIn.seek(startOffset)
       } else {
         var toSkip = startOffset
         while (toSkip > 0) {
diff --git a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/integration/Test40CompressesFilesSpec.scala b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/integration/Test40CompressesFilesSpec.scala
@@ -161,11 +161,11 @@ class Test40CompressesFilesSpec extends AnyFunSuite with SparkTestBase with Bina
   }
 
   test("Test compressed EBCDIC gzip file with indexes") {
-    testCompressedFile("../data/test40_data/example.dat.gz")
+    testCompressedFile("../data/test40_data/example.dat.gz", useIndexes = true)
   }
 
-  test("Test compressed EBCDIC  bzip2 file with indexes") {
-    testCompressedFile("../data/test40_data/example.dat.bz2")
+  test("Test compressed EBCDIC bzip2 file with indexes") {
+    testCompressedFile("../data/test40_data/example.dat.bz2", useIndexes = true)
   }
 
   test("read mixed compressed EBCDIC files") {

Original file line number	Diff line number	Diff line change
`@@ -153,8 +153,9 @@ class VarLenNestedReader[T: ClassTag](copybookContents: Seq[String],`
`153`	`153`	`logger.info(s"Input split size = ${inputSplitSizeRecords.get} records")`
`154`	`154`	`} else {`
`155`	`155`	`if (inputSplitSizeMB.nonEmpty) {`
`156`		`- if (inputSplitSizeMB.get < 1 \|\| inputSplitSizeMB.get > 200000) {`
`157`		`- throw new IllegalArgumentException(s"Invalid input split size of ${inputSplitSizeMB.get} MB.")`
	`156`	`+ val maxSplitSizeMB = if (dataStream.isCompressed) 200000 else 2000`
	`157`	`+ if (inputSplitSizeMB.get < 1 \|\| inputSplitSizeMB.get > maxSplitSizeMB) {`
	`158`	`+ throw new IllegalArgumentException(s"Invalid input split size of ${inputSplitSizeMB.get} MB (max allowed: $maxSplitSizeMB MB).")`
`158`	`159`	`}`
`159`	`160`	`logger.info(s"Input split size = ${inputSplitSizeMB.get} MB")`
`160`	`161`	`}`
Original file line number	Diff line number	Diff line change
`@@ -161,11 +161,11 @@ class Test40CompressesFilesSpec extends AnyFunSuite with SparkTestBase with Bina`
`161`	`161`	`}`
`162`	`162`
`163`	`163`	`test("Test compressed EBCDIC gzip file with indexes") {`
`164`		`- testCompressedFile("../data/test40_data/example.dat.gz")`
	`164`	`+ testCompressedFile("../data/test40_data/example.dat.gz", useIndexes = true)`
`165`	`165`	`}`
`166`	`166`
`167`		`- test("Test compressed EBCDIC bzip2 file with indexes") {`
`168`		`- testCompressedFile("../data/test40_data/example.dat.bz2")`
	`167`	`+ test("Test compressed EBCDIC bzip2 file with indexes") {`
	`168`	`+ testCompressedFile("../data/test40_data/example.dat.bz2", useIndexes = true)`
`169`	`169`	`}`
`170`	`170`
`171`	`171`	`test("read mixed compressed EBCDIC files") {`