#795 Use big-endian RDWs when generating VRL file.

yruslan · yruslan · commit daef60987752 · 2025-11-11T08:52:42.000+01:00
diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/impl/CobolProcessorToRdw.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/impl/CobolProcessorToRdw.scala
@@ -26,7 +26,7 @@ import java.io.OutputStream
 /**
   * Implementation of the CobolProcessor trait, responsible for processing COBOL data streams
   * by extracting records and applying a user-defined raw record processor. This processor
-  * converts the input format to the variable record length format with little-endian RDW records.
+  * converts the input format to the variable record length format with big-endian RDW records.
   *
   * Please, do not use this class directly. Use `CobolProcessor.builder()` instead.
   *
diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/impl/StreamProcessor.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/processor/impl/StreamProcessor.scala
@@ -70,7 +70,7 @@ object StreamProcessor {
   }
 
   /**
-    * Processes a stream of COBOL raw records and writes it back as a variable length format with little-endian RDW headers.
+    * Processes a stream of COBOL raw records and writes it back as a variable length format with big-endian RDW headers.
     *
     * @param copybook        the COBOL copybook that describes the schema of the records.
     * @param options         arbitrary options used for splitting input data into records (same as 'spark-cobol' options).
@@ -96,7 +96,7 @@ object StreamProcessor {
 
       val updatedRecord = recordProcessor.processRecord(record, ctx)
 
-      val rdw = Array[Byte](0, 0, ((updatedRecord.length) & 0xFF).toByte, (((updatedRecord.length) >> 8) & 0xFF).toByte)
+      val rdw = Array[Byte](((updatedRecord.length >> 8) & 0xFF).toByte, ((updatedRecord.length) & 0xFF).toByte, 0, 0)
 
       outputStream.write(rdw)
       outputStream.write(updatedRecord)
diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/processor/impl/CobolProcessorToRdwSuite.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/processor/impl/CobolProcessorToRdwSuite.scala
@@ -53,7 +53,7 @@ class CobolProcessorToRdwSuite extends AnyWordSpec {
       assert(count == 4)
       assert(outputArray.length == 24)
       assert(outputArray.sameElements(
-        Array(0, 0, 2, 0, -16, -16, 0, 0, 2, 0, -15, -15, 0, 0, 2, 0, -14, -14, 0, 0, 2, 0, -13, -13)
+        Array(0, 2, 0, 0, -16, -16, 0, 2, 0, 0, -15, -15, 0, 2, 0, 0, -14, -14, 0, 2, 0, 0, -13, -13)
       ))
     }
   }
diff --git a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/SparkCobolProcessorSuite.scala b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/SparkCobolProcessorSuite.scala
@@ -130,13 +130,15 @@ class SparkCobolProcessorSuite extends AnyWordSpec with SparkTestBase with Binar
         val outputData = readBinaryFile(outputFile)
 
         assert(outputData.sameElements(
-          Array(0, 0, 1, 0, -16, 0, 0, 1, 0, -15, 0, 0, 1, 0, -14, 0, 0, 1, 0, -13).map(_.toByte)
+          Array(0, 1, 0, 0, -16, 0, 1, 0, 0, -15, 0, 1, 0, 0, -14, 0, 1, 0, 0, -13).map(_.toByte)
         ))
 
         val actual = spark.read
           .format("cobol")
           .option("copybook_contents", copybook)
           .option("record_format", "V")
+          .option("is_rdw_big_endian", "true")
+          .option("pedantic", "true")
           .load(outputFile)
           .toJSON
           .collect()

Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@ import java.io.OutputStream`
`26`	`26`	`/**`
`27`	`27`	`* Implementation of the CobolProcessor trait, responsible for processing COBOL data streams`
`28`	`28`	`* by extracting records and applying a user-defined raw record processor. This processor`
`29`		`- * converts the input format to the variable record length format with little-endian RDW records.`
	`29`	`+ * converts the input format to the variable record length format with big-endian RDW records.`
`30`	`30`	`*`
`31`	`31`	* Please, do not use this class directly. Use `CobolProcessor.builder()` instead.
`32`	`32`	`*`
Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,7 @@ object StreamProcessor {`
`70`	`70`	`}`
`71`	`71`
`72`	`72`	`/**`
`73`		`- * Processes a stream of COBOL raw records and writes it back as a variable length format with little-endian RDW headers.`
	`73`	`+ * Processes a stream of COBOL raw records and writes it back as a variable length format with big-endian RDW headers.`
`74`	`74`	`*`
`75`	`75`	`* @param copybook the COBOL copybook that describes the schema of the records.`
`76`	`76`	`* @param options arbitrary options used for splitting input data into records (same as 'spark-cobol' options).`
`@@ -96,7 +96,7 @@ object StreamProcessor {`
`96`	`96`
`97`	`97`	`val updatedRecord = recordProcessor.processRecord(record, ctx)`
`98`	`98`
`99`		`- val rdw = Array[Byte](0, 0, ((updatedRecord.length) & 0xFF).toByte, (((updatedRecord.length) >> 8) & 0xFF).toByte)`
	`99`	`+ val rdw = Array[Byte](((updatedRecord.length >> 8) & 0xFF).toByte, ((updatedRecord.length) & 0xFF).toByte, 0, 0)`
`100`	`100`
`101`	`101`	`outputStream.write(rdw)`
`102`	`102`	`outputStream.write(updatedRecord)`
Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,7 @@ class CobolProcessorToRdwSuite extends AnyWordSpec {`
`53`	`53`	`assert(count == 4)`
`54`	`54`	`assert(outputArray.length == 24)`
`55`	`55`	`assert(outputArray.sameElements(`
`56`		`- Array(0, 0, 2, 0, -16, -16, 0, 0, 2, 0, -15, -15, 0, 0, 2, 0, -14, -14, 0, 0, 2, 0, -13, -13)`
	`56`	`+ Array(0, 2, 0, 0, -16, -16, 0, 2, 0, 0, -15, -15, 0, 2, 0, 0, -14, -14, 0, 2, 0, 0, -13, -13)`
`57`	`57`	`))`
`58`	`58`	`}`
`59`	`59`	`}`