diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala index f8d9781cd..fc4207dcc 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/Copybook.scala @@ -26,6 +26,7 @@ import scala.collection.mutable.ArrayBuffer class Copybook(val ast: CopybookAST) extends Logging with Serializable { + import Copybook._ def getCobolSchema: CopybookAST = ast @@ -215,38 +216,6 @@ class Copybook(val ast: CopybookAST) extends Logging with Serializable { field.decodeTypeValue(0, slicedBytes) } - /** - * Set value of a field of the copybook record by the AST object of the field - * - * Nested field names can contain '.' to identify the exact field. - * If the field name is unique '.' is not required. - * - * @param field The AST object of the field - * @param bytes Binary encoded data of the record - * @param startOffset An offset to the beginning of the field in the data (in bytes). - * @return The value of the field - * - */ - def setPrimitiveField(field: Primitive, recordBytes: Array[Byte], value: Any, startOffset: Int = 0): Unit = { - field.encode match { - case Some(encode) => - val fieldBytes = encode(value) - val startByte = field.binaryProperties.offset + startOffset - val endByte = field.binaryProperties.offset + startOffset + field.binaryProperties.actualSize - - if (startByte < 0 || endByte > recordBytes.length) { - throw new IllegalArgumentException(s"Cannot set value for field '${field.name}' because the field is out of bounds of the record.") - } - if (fieldBytes.length != field.binaryProperties.dataSize) { - throw new IllegalArgumentException(s"Cannot set value for field '${field.name}' because the encoded value has a different size than the field size.") - } - - System.arraycopy(fieldBytes, 0, recordBytes, startByte, fieldBytes.length) - case None => - throw new IllegalStateException(s"Cannot set value for field '${field.name}' because it does not have an encoder defined.") - } - } - /** This routine is used for testing by generating a layout position information to compare with mainframe output */ def generateRecordLayoutPositions(): String = { var fieldCounter: Int = 0 @@ -442,4 +411,36 @@ object Copybook { new Copybook(schema) } + + /** + * Set value of a field of the copybook record by the AST object of the field + * + * Nested field names can contain '.' to identify the exact field. + * If the field name is unique '.' is not required. + * + * @param field The AST object of the field + * @param recordBytes Binary encoded data of the record + * @param startOffset An offset to the beginning of the field in the data (in bytes). + * @return The value of the field + * + */ + def setPrimitiveField(field: Primitive, recordBytes: Array[Byte], value: Any, startOffset: Int = 0): Unit = { + field.encode match { + case Some(encode) => + val fieldBytes = encode(value) + val startByte = field.binaryProperties.offset + startOffset + val endByte = field.binaryProperties.offset + startOffset + field.binaryProperties.actualSize + + if (startByte < 0 || endByte > recordBytes.length) { + throw new IllegalArgumentException(s"Cannot set value for field '${field.name}' because the field is out of bounds of the record.") + } + if (fieldBytes.length != field.binaryProperties.dataSize) { + throw new IllegalArgumentException(s"Cannot set value for field '${field.name}' because the encoded value has a different size than the field size.") + } + + System.arraycopy(fieldBytes, 0, recordBytes, startByte, fieldBytes.length) + case None => + throw new IllegalStateException(s"Cannot set value for field '${field.name}' because it does not have an encoder defined.") + } + } } \ No newline at end of file diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/BCDNumberEncoders.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/BCDNumberEncoders.scala new file mode 100644 index 000000000..b5d9582d2 --- /dev/null +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/BCDNumberEncoders.scala @@ -0,0 +1,108 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.parser.encoding + +import java.math.RoundingMode + +object BCDNumberEncoders { + /** + * Encode a number as a binary encoded decimal (BCD) aka COMP-3 format to an array of bytes. + * + * Output length (bytes): + * - With mandatory sign nibble (signed or unsigned): ceil((precision + 1) / 2) + * - Unsigned without sign nibble: ceil(precision / 2). + * + * @param number The number to encode. + * @param precision Total number of digits in the number. + * @param scale A decimal scale if a number is a decimal. Should be greater or equal to zero. + * @param scaleFactor Additional zeros to be added before of after the decimal point. + * @param signed if true, sign nibble is added and negative numbers are supported. + * @param mandatorySignNibble If true, the BCD number should contain the sign nibble. Otherwise, the number is + * considered unsigned, and negative numbers are encoded as null (zero bytes). + * @return A BCD representation of the number, array of zero bytes if the data is not properly formatted. + */ + def encodeBCDNumber(number: java.math.BigDecimal, + precision: Int, + scale: Int, + scaleFactor: Int, + signed: Boolean, + mandatorySignNibble: Boolean): Array[Byte] = { + if (precision < 1) + throw new IllegalArgumentException(s"Invalid BCD precision=$precision, should be greater than zero.") + + val totalDigits = if (mandatorySignNibble) { + if (precision % 2 == 0) precision + 2 else precision + 1 + } else { + if (precision % 2 == 0) precision else precision + 1 + } + + val byteCount = totalDigits / 2 + val bytes = new Array[Byte](byteCount) + + if (number == null) { + return bytes + } + + val shift = scaleFactor - scale + val shifted = if (shift == 0) number else number.movePointLeft(shift) + + val isNegative = number.signum() < 0 + val digitsOnly = shifted.abs().setScale(0, RoundingMode.HALF_DOWN).toPlainString + + if (isNegative && (!signed || !mandatorySignNibble)) { + return bytes + } + + if (digitsOnly.length > precision || scale < 0) + return bytes + + val signNibble: Byte = if (signed) { + if (isNegative) 0x0D else 0x0C + } else { + 0x0F + } + + val padded = if (mandatorySignNibble) { + if (digitsOnly.length == totalDigits - 1) + digitsOnly + "0" + else + "0"*(totalDigits - digitsOnly.length - 1) + digitsOnly + "0" + } else { + if (digitsOnly.length == totalDigits) + digitsOnly + else + "0"*(totalDigits - digitsOnly.length) + digitsOnly + } + + var bi = 0 + + while (bi < byteCount) { + val high = padded.charAt(bi * 2).asDigit + val low = padded.charAt(bi * 2 + 1).asDigit + + bytes(bi) = ((high << 4) | low).toByte + bi += 1 + } + + if (mandatorySignNibble) { + bytes(byteCount - 1) = ((bytes(byteCount - 1) & 0xF0) | signNibble).toByte + } + + bytes + } + +} diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/EncoderSelector.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/EncoderSelector.scala index c4b7c1b9b..ffe0e4b22 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/EncoderSelector.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/EncoderSelector.scala @@ -16,7 +16,7 @@ package za.co.absa.cobrix.cobol.parser.encoding -import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, CobolType} +import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, COMP3, COMP3U, CobolType, Decimal, Integral} import za.co.absa.cobrix.cobol.parser.encoding.codepage.{CodePage, CodePageCommon} import java.nio.charset.{Charset, StandardCharsets} @@ -31,6 +31,14 @@ object EncoderSelector { dataType match { case alphaNumeric: AlphaNumeric if alphaNumeric.compact.isEmpty => getStringEncoder(alphaNumeric.enc.getOrElse(EBCDIC), ebcdicCodePage, asciiCharset, alphaNumeric.length) + case integralComp3: Integral if integralComp3.compact.exists(_.isInstanceOf[COMP3]) => + Option(getBdcEncoder(integralComp3.precision, 0, 0, integralComp3.signPosition.isDefined, mandatorySignNibble = true)) + case integralComp3: Integral if integralComp3.compact.exists(_.isInstanceOf[COMP3U]) => + Option(getBdcEncoder(integralComp3.precision, 0, 0, integralComp3.signPosition.isDefined, mandatorySignNibble = false)) + case decimalComp3: Decimal if decimalComp3.compact.exists(_.isInstanceOf[COMP3]) => + Option(getBdcEncoder(decimalComp3.precision, decimalComp3.scale, decimalComp3.scaleFactor, decimalComp3.signPosition.isDefined, mandatorySignNibble = true)) + case decimalComp3: Decimal if decimalComp3.compact.exists(_.isInstanceOf[COMP3U]) => + Option(getBdcEncoder(decimalComp3.precision, decimalComp3.scale, decimalComp3.scaleFactor, decimalComp3.signPosition.isDefined, mandatorySignNibble = false)) case _ => None } @@ -80,4 +88,26 @@ object EncoderSelector { buf } + def getBdcEncoder(precision: Int, + scale: Int, + scaleFactor: Int, + signed: Boolean, + mandatorySignNibble: Boolean): Encoder = { + if (signed && !mandatorySignNibble) + throw new IllegalArgumentException("If signed is true, mandatorySignNibble must also be true.") + + (a: Any) => { + val number = a match { + case null => null + case d: java.math.BigDecimal => d + case n: java.math.BigInteger => new java.math.BigDecimal(n) + case n: Byte => new java.math.BigDecimal(n) + case n: Int => new java.math.BigDecimal(n) + case n: Long => new java.math.BigDecimal(n) + case x => new java.math.BigDecimal(x.toString) + } + BCDNumberEncoders.encodeBCDNumber(number, precision, scale, scaleFactor, signed, mandatorySignNibble) + } + } + } diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/BCDNumberEncodersSuite.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/BCDNumberEncodersSuite.scala new file mode 100644 index 000000000..611fbee6c --- /dev/null +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/BCDNumberEncodersSuite.scala @@ -0,0 +1,226 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.parser.encoding + +import org.scalatest.Assertion +import org.scalatest.wordspec.AnyWordSpec + +class BCDNumberEncodersSuite extends AnyWordSpec { + "encodeBCDNumber" should { + "integral number" when { + "encode a number" in { + val expected = Array[Byte](0x12, 0x34, 0x5C) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 5, 0, 0, signed = true, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode a number with an even precision" in { + val expected = Array[Byte](0x01, 0x23, 0x4C) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(1234), 4, 0, 0, signed = true, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode a small number" in { + val expected = Array[Byte](0x00, 0x00, 0x5C) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(5), 5, 0, 0, signed = true, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode an unsigned number" in { + val expected = Array[Byte](0x12, 0x34, 0x5F) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 5, 0, 0, signed = false, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode a negative number" in { + val expected = Array[Byte](0x12, 0x34, 0x5D) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-12345), 5, 0, 0, signed = true, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode a small negative number" in { + val expected = Array[Byte](0x00, 0x00, 0x7D) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-7), 4, 0, 0, signed = true, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode a number without sign nibble" in { + val expected = Array[Byte](0x01, 0x23, 0x45) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 5, 0, 0, signed = false, mandatorySignNibble = false) + + checkExpected(actual, expected) + } + + "encode a number without sign nibble with an even precision" in { + val expected = Array[Byte](0x12, 0x34) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(1234), 4, 0, 0, signed = true, mandatorySignNibble = false) + + checkExpected(actual, expected) + } + + "encode a too big number" in { + val expected = Array[Byte](0x00, 0x00, 0x00) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(123456), 5, 0, 0, signed = false, mandatorySignNibble = false) + + checkExpected(actual, expected) + } + + "encode a too big negative number" in { + val expected = Array[Byte](0x00, 0x00, 0x00) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-123456), 5, 0, 0, signed = true, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode a number with nbegative scale" in { + val expected = Array[Byte](0x00, 0x00, 0x00) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 5, -1, 0, signed = false, mandatorySignNibble = false) + + checkExpected(actual, expected) + } + + "attempt to encode a negative number without sign nibble" in { + val expected = Array[Byte](0x00, 0x00, 0x00) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-12345), 5, 0, 0, signed = false, mandatorySignNibble = false) + + checkExpected(actual, expected) + } + + "attempt to encode a signed number without a sign nibble" in { + val expected = Array[Byte](0x00, 0x00, 0x00) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-12345), 5, 0, 0, signed = true, mandatorySignNibble = false) + + checkExpected(actual, expected) + } + + "attempt to encode a number with an incorrect precision" in { + val expected = Array[Byte](0x00, 0x00) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 4, 0, 0, signed = false, mandatorySignNibble = false) + + checkExpected(actual, expected) + } + + "attempt to encode a number with an incorrect precision with sign nibble" in { + val expected = Array[Byte](0x00, 0x00, 0x00) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 4, 0, 0, signed = true, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "attempt to encode a number with zero prexision" in { + assertThrows[IllegalArgumentException](BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 0, 0, 0, signed = true, mandatorySignNibble = true)) + } + } + + "decimal number" when { + "encode a number" in { + val expected = Array[Byte](0x12, 0x34, 0x5C) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(123.45), 5, 2, 0, signed = true, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode a small number" in { + val expected = Array[Byte](0x00, 0x00, 0x5C) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(0.05), 5, 2, 0, signed = true, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode an unsigned number" in { + val expected = Array[Byte](0x12, 0x34, 0x5F) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(1234.5), 5, 1, 0, signed = false, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode a negative number" in { + val expected = Array[Byte](0x12, 0x34, 0x5D) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-12.345), 5, 3, 0, signed = true, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode a small negative number" in { + val expected = Array[Byte](0x00, 0x00, 0x7D) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-0.00007), 4, 5, 0, signed = true, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode a number without sign nibble" in { + val expected = Array[Byte](0x01, 0x23, 0x45) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(123.45), 5, 2, 0, signed = false, mandatorySignNibble = false) + + checkExpected(actual, expected) + } + + "encode a too precise number" in { + val expected = Array[Byte](0x01, 0x23, 0x46) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(123.456), 5, 2, 0, signed = false, mandatorySignNibble = false) + + checkExpected(actual, expected) + } + + "encode a too big number" in { + val expected = Array[Byte](0x00, 0x00, 0x00) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(1234.56), 5, 2, 0, signed = false, mandatorySignNibble = false) + + checkExpected(actual, expected) + } + + "encode a too big negative number" in { + val expected = Array[Byte](0x00, 0x00, 0x00) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-1234.56), 5, 2, 0, signed = true, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode a number with positive scale factor" in { + val expected = Array[Byte](0x00, 0x12, 0x3F) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12300), 5, 0, 2, signed = false, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + + "encode a number with negative scale factor" in { + val expected = Array[Byte](0x00, 0x12, 0x3F) + val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(1.23), 5, 0, -2, signed = false, mandatorySignNibble = true) + + checkExpected(actual, expected) + } + } + } + + def checkExpected(actual: Array[Byte], expected: Array[Byte]): Assertion = { + if (!actual.sameElements(expected)) { + val actualHex = actual.map(b => f"$b%02X").mkString(" ") + val expectedHex = expected.map(b => f"$b%02X").mkString(" ") + fail(s"Actual: $actualHex\nExpected: $expectedHex") + } else { + succeed + } + } + + +} diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala index 30a313990..72ad1252a 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/extract/BinaryExtractorSpec.scala @@ -216,7 +216,7 @@ class BinaryExtractorSpec extends AnyFunSuite { val fieldName2: String = "COMPANY.COMPANY-ID-NUM" val fields2 = copybook.getFieldByName(fieldName2) assert(fields2.isInstanceOf[Primitive]) - assert(fields2.asInstanceOf[Primitive].encode.isEmpty) + assert(fields2.asInstanceOf[Primitive].encode.nonEmpty) } test("Test padding when setting field value by name") { @@ -230,7 +230,7 @@ class BinaryExtractorSpec extends AnyFunSuite { val fieldName2: String = "COMPANY.COMPANY-ID-NUM" val fields2 = copybook2.getFieldByName(fieldName2) assert(fields2.isInstanceOf[Primitive]) - assert(fields2.asInstanceOf[Primitive].encode.isEmpty) + assert(fields2.asInstanceOf[Primitive].encode.nonEmpty) } test("Test truncating when setting field value by name") { @@ -244,6 +244,6 @@ class BinaryExtractorSpec extends AnyFunSuite { val fieldName2: String = "COMPANY.COMPANY-ID-NUM" val fields2 = copybook2.getFieldByName(fieldName2) assert(fields2.isInstanceOf[Primitive]) - assert(fields2.asInstanceOf[Primitive].encode.isEmpty) + assert(fields2.asInstanceOf[Primitive].encode.nonEmpty) } } diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/writer/BasicRecordCombiner.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/writer/BasicRecordCombiner.scala index 8b9329286..56fb1c5d8 100644 --- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/writer/BasicRecordCombiner.scala +++ b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/writer/BasicRecordCombiner.scala @@ -18,11 +18,16 @@ package za.co.absa.cobrix.spark.cobol.writer import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame +import za.co.absa.cobrix.cobol.parser.Copybook +import za.co.absa.cobrix.cobol.parser.ast.datatype.{Decimal, Integral} import za.co.absa.cobrix.cobol.parser.ast.{Group, Primitive, Statement} import za.co.absa.cobrix.cobol.reader.parameters.ReaderParameters import za.co.absa.cobrix.cobol.reader.schema.CobolSchema class BasicRecordCombiner extends RecordCombiner { + + import BasicRecordCombiner._ + override def combine(df: DataFrame, cobolSchema: CobolSchema, readerParameters: ReaderParameters): RDD[Array[Byte]] = { val ast = getAst(cobolSchema) val copybookFields = ast.children.filter { @@ -38,7 +43,9 @@ class BasicRecordCombiner extends RecordCombiner { cobolFields.foreach(cobolField => if (cobolField.encode.isEmpty) { - throw new IllegalArgumentException(s"Field '${cobolField.name}' does not have an encoding defined in the copybook. 'PIC ${cobolField.dataType.originalPic}' is not yet supported.") + val fieldDefinition = getFieldDefinition(cobolField) + throw new IllegalArgumentException(s"Field '${cobolField.name}' does not have an encoding defined in the copybook. " + + s"'PIC $fieldDefinition' is not yet supported.") } ) @@ -62,7 +69,7 @@ class BasicRecordCombiner extends RecordCombiner { if (!row.isNullAt(sparkIdx)) { val fieldStr = row.get(sparkIdx) val cobolField = cobolFields(cobolIdx) - cobolSchema.copybook.setPrimitiveField(cobolField, ar, fieldStr, 0) + Copybook.setPrimitiveField(cobolField, ar, fieldStr, 0) } } @@ -107,3 +114,17 @@ class BasicRecordCombiner extends RecordCombiner { } } } + +object BasicRecordCombiner { + def getFieldDefinition(field: Primitive): String = { + val pic = field.dataType.originalPic.getOrElse(field.dataType.pic) + + val usage = field.dataType match { + case dt: Integral => dt.compact.map(_.toString).getOrElse("USAGE IS DISPLAY") + case dt: Decimal => dt.compact.map(_.toString).getOrElse("USAGE IS DISPLAY") + case _ => "" + } + + s"$pic $usage".trim + } +} diff --git a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/writer/FixedLengthEbcdicWriterSuite.scala b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/writer/FixedLengthEbcdicWriterSuite.scala index 54adb4e5e..5367d9279 100644 --- a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/writer/FixedLengthEbcdicWriterSuite.scala +++ b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/writer/FixedLengthEbcdicWriterSuite.scala @@ -126,6 +126,64 @@ class FixedLengthEbcdicWriterSuite extends AnyWordSpec with SparkTestBase with B } } + "write data frames with COMP-3 fields" in { + withTempDirectory("cobol_writer1") { tempDir => + val df = List( + (1, 100.5, new java.math.BigDecimal(10.23), 1, 100.5, new java.math.BigDecimal(10.12)), + (2, 800.4, new java.math.BigDecimal(30), 2, 800.4, new java.math.BigDecimal(30)), + (3, 22.33, new java.math.BigDecimal(-20), 3, 22.33, new java.math.BigDecimal(-20)) + ).toDF("A", "B", "C", "D", "E", "F") + + val path = new Path(tempDir, "writer1") + + val copybookContentsWithFilers = + """ 01 RECORD. + 05 A PIC S9(1) COMP-3. + 05 B PIC 9(4)V9(2) COMP-3. + 05 C PIC S9(2)V9(2) COMP-3. + 05 D PIC 9(1) COMP-3U. + 05 E PIC 9(4)V9(2) COMP-3U. + 05 F PIC 9(2)V9(2) COMP-3U. + """ + + df.coalesce(1) + .orderBy("A") + .write + .format("cobol") + .mode(SaveMode.Overwrite) + .option("copybook_contents", copybookContentsWithFilers) + .save(path.toString) + + val fs = path.getFileSystem(spark.sparkContext.hadoopConfiguration) + + assert(fs.exists(path), "Output directory should exist") + val files = fs.listStatus(path) + .filter(_.getPath.getName.startsWith("part-")) + assert(files.nonEmpty, "Output directory should contain part files") + + val partFile = files.head.getPath + val data = fs.open(partFile) + val bytes = new Array[Byte](files.head.getLen.toInt) + data.readFully(bytes) + data.close() + + // Expected EBCDIC data for sample test data + val expected = Array( + 0x1C, 0x00, 0x10, 0x05, 0x0F, 0x01, 0x02, 0x3C, 0x01, 0x01, 0x00, 0x50, 0x10, 0x12, + 0x2C, 0x00, 0x80, 0x04, 0x0F, 0x03, 0x00, 0x0C, 0x02, 0x08, 0x00, 0x40, 0x30, 0x00, + 0x3C, 0x00, 0x02, 0x23, 0x3F, 0x02, 0x00, 0x0D, 0x03, 0x00, 0x22, 0x33, 0x00, 0x00 + ).map(_.toByte) + + if (!bytes.sameElements(expected)) { + println(s"Expected bytes: ${expected.map("%02X" format _).mkString(" ")}") + println(s"Actual bytes: ${bytes.map("%02X" format _).mkString(" ")}") + + assert(bytes.sameElements(expected), "Written data should match expected EBCDIC encoding") + } + } + } + + "write should fail with save mode append and the path exists" in { withTempDirectory("cobol_writer3") { tempDir => val df = List(("A", "First"), ("B", "Scnd"), ("C", "Last")).toDF("A", "B")