diff --git a/README.md b/README.md index 4a0d1f161..e4f7b9de6 100644 --- a/README.md +++ b/README.md @@ -1684,11 +1684,13 @@ The writer is still in its early stages and has several limitations: 05 FIELD_1 PIC X(1). 05 FIELD_2 PIC X(5). ``` -- Only `PIC X(n)` fields are supported; numeric types are not. +- Supported types: + - `PIC X(n)` alphanumeric. + - `PIC S9(n)` numeric (integral and decimal) with `COMP`/`COMP-4`/`COMP-5` (big-endian), `COMP-3`, and + `COMP-9` (Cobrix little-endian). - Only fixed record length output is supported (`record_format = F`). - `REDEFINES` and `OCCURS` are not supported. - Only the core EBCDIC encoder is supported; specific EBCDIC code pages are not yet available. -- Save mode `append` is not supported; only `overwrite` is. - Partitioning by DataFrame fields is not supported. ### Implementation details diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryUtils.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryUtils.scala index 0567fc1a1..52d5dab79 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryUtils.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/BinaryUtils.scala @@ -97,10 +97,11 @@ object BinaryUtils { def getBytesCount(compression: Option[Usage], precision: Int, isSigned: Boolean, isExplicitDecimalPt: Boolean, isSignSeparate: Boolean): Int = { import Constants._ - val isRealSigned = if (isSignSeparate) false else isSigned + val bytes = compression match { - case Some(comp) if comp == COMP4() || comp == COMP5() || comp == COMP9() => // || comp == binary2() - // if native binary follow IBM guide to digit binary length + case Some(comp) if comp == COMP4() || comp == COMP5() || comp == COMP9() => + // If native binary follow IBM guide to digit binary length. + // COMP-9 is a little-endian Cobrix extension. It also supports 1 byte binary numbers for 1 and 2 decimal digit PICs. precision match { case p if p >= 1 && p <= 2 && comp == COMP9() => 1 // byte case p if p >= minShortPrecision && p <= maxShortPrecision => binaryShortSizeBytes diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/BinaryEncoders.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/BinaryEncoders.scala new file mode 100644 index 000000000..771ffdf80 --- /dev/null +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/BinaryEncoders.scala @@ -0,0 +1,71 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.parser.encoding + +import java.math.RoundingMode + +object BinaryEncoders { + def encodeBinaryNumber(number: java.math.BigDecimal, + isSigned: Boolean, + outputSize: Int, + bigEndian: Boolean, + precision: Int, + scale: Int, + scaleFactor: Int): Array[Byte] = { + val bytes = new Array[Byte](outputSize) + + if (number == null || precision < 1 || scale < 0 || outputSize < 1) + return bytes + + val shift = scaleFactor - scale + val bigInt = if (shift == 0) + number.setScale(0, RoundingMode.HALF_DOWN).toBigIntegerExact + else + number.movePointLeft(shift).setScale(0, RoundingMode.HALF_DOWN).toBigIntegerExact + + val intValue = bigInt.toByteArray + val intValueLen = intValue.length + + if (intValueLen > outputSize || (!isSigned && bigInt.signum() < 0)) + return bytes + + val paddingByte = if (bigInt.signum() < 0) 0xFF.toByte else 0x00.toByte + + if (bigEndian) { + var i = 0 + while (i < outputSize) { + if (i < intValueLen) { + bytes(outputSize - i - 1) = intValue(intValueLen - i - 1) + } else { + bytes(outputSize - i - 1) = paddingByte + } + i += 1 + } + } else { + var i = 0 + while (i < outputSize) { + if (i < intValueLen) { + bytes(i) = intValue(intValueLen - i - 1) + } else { + bytes(i) = paddingByte + } + i += 1 + } + } + bytes + } +} diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/EncoderSelector.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/EncoderSelector.scala index ffe0e4b22..56962c99d 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/EncoderSelector.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/EncoderSelector.scala @@ -16,7 +16,8 @@ package za.co.absa.cobrix.cobol.parser.encoding -import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, COMP3, COMP3U, CobolType, Decimal, Integral} +import za.co.absa.cobrix.cobol.parser.ast.datatype.{AlphaNumeric, COMP3, COMP3U, COMP4, COMP9, CobolType, Decimal, Integral, Usage} +import za.co.absa.cobrix.cobol.parser.decoders.BinaryUtils import za.co.absa.cobrix.cobol.parser.encoding.codepage.{CodePage, CodePageCommon} import java.nio.charset.{Charset, StandardCharsets} @@ -29,16 +30,24 @@ object EncoderSelector { ebcdicCodePage: CodePage = new CodePageCommon, asciiCharset: Charset = StandardCharsets.US_ASCII): Option[Encoder] = { dataType match { - case alphaNumeric: AlphaNumeric if alphaNumeric.compact.isEmpty => + case alphaNumeric: AlphaNumeric if alphaNumeric.compact.isEmpty => getStringEncoder(alphaNumeric.enc.getOrElse(EBCDIC), ebcdicCodePage, asciiCharset, alphaNumeric.length) - case integralComp3: Integral if integralComp3.compact.exists(_.isInstanceOf[COMP3]) => + case integralComp3: Integral if integralComp3.compact.exists(_.isInstanceOf[COMP3]) => Option(getBdcEncoder(integralComp3.precision, 0, 0, integralComp3.signPosition.isDefined, mandatorySignNibble = true)) - case integralComp3: Integral if integralComp3.compact.exists(_.isInstanceOf[COMP3U]) => + case integralComp3: Integral if integralComp3.compact.exists(_.isInstanceOf[COMP3U]) => Option(getBdcEncoder(integralComp3.precision, 0, 0, integralComp3.signPosition.isDefined, mandatorySignNibble = false)) - case decimalComp3: Decimal if decimalComp3.compact.exists(_.isInstanceOf[COMP3]) => + case decimalComp3: Decimal if decimalComp3.compact.exists(_.isInstanceOf[COMP3]) => Option(getBdcEncoder(decimalComp3.precision, decimalComp3.scale, decimalComp3.scaleFactor, decimalComp3.signPosition.isDefined, mandatorySignNibble = true)) - case decimalComp3: Decimal if decimalComp3.compact.exists(_.isInstanceOf[COMP3U]) => + case decimalComp3: Decimal if decimalComp3.compact.exists(_.isInstanceOf[COMP3U]) => Option(getBdcEncoder(decimalComp3.precision, decimalComp3.scale, decimalComp3.scaleFactor, decimalComp3.signPosition.isDefined, mandatorySignNibble = false)) + case integralBinary: Integral if integralBinary.compact.exists(_.isInstanceOf[COMP4]) => + Option(getBinaryEncoder(integralBinary.compact, integralBinary.precision, 0, 0, integralBinary.signPosition.isDefined, isBigEndian = true)) + case integralBinary: Integral if integralBinary.compact.exists(_.isInstanceOf[COMP9]) => + Option(getBinaryEncoder(integralBinary.compact, integralBinary.precision, 0, 0, integralBinary.signPosition.isDefined, isBigEndian = false)) + case decimalBinary: Decimal if decimalBinary.compact.exists(_.isInstanceOf[COMP4]) => + Option(getBinaryEncoder(decimalBinary.compact, decimalBinary.precision, decimalBinary.scale, decimalBinary.scaleFactor, decimalBinary.signPosition.isDefined, isBigEndian = true)) + case decimalBinary: Decimal if decimalBinary.compact.exists(_.isInstanceOf[COMP9]) => + Option(getBinaryEncoder(decimalBinary.compact, decimalBinary.precision, decimalBinary.scale, decimalBinary.scaleFactor, decimalBinary.signPosition.isDefined, isBigEndian = false)) case _ => None } @@ -88,6 +97,27 @@ object EncoderSelector { buf } + def getBinaryEncoder(compression: Option[Usage], + precision: Int, + scale: Int, + scaleFactor: Int, + isSigned: Boolean, + isBigEndian: Boolean): Encoder = { + val numBytes = BinaryUtils.getBytesCount(compression, precision, isSigned, isExplicitDecimalPt = false, isSignSeparate = false) + (a: Any) => { + val number = a match { + case null => null + case d: java.math.BigDecimal => d + case n: java.math.BigInteger => new java.math.BigDecimal(n) + case n: Byte => new java.math.BigDecimal(n) + case n: Int => new java.math.BigDecimal(n) + case n: Long => new java.math.BigDecimal(n) + case x => new java.math.BigDecimal(x.toString) + } + BinaryEncoders.encodeBinaryNumber(number, isSigned, numBytes, isBigEndian, precision, scale, scaleFactor) + } + } + def getBdcEncoder(precision: Int, scale: Int, scaleFactor: Int, diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/BCDNumberEncodersSuite.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/BCDNumberEncodersSuite.scala index 611fbee6c..e3331d489 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/BCDNumberEncodersSuite.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/BCDNumberEncodersSuite.scala @@ -16,8 +16,8 @@ package za.co.absa.cobrix.cobol.parser.encoding -import org.scalatest.Assertion import org.scalatest.wordspec.AnyWordSpec +import za.co.absa.cobrix.cobol.testutils.ComparisonUtils._ class BCDNumberEncodersSuite extends AnyWordSpec { "encodeBCDNumber" should { @@ -26,108 +26,108 @@ class BCDNumberEncodersSuite extends AnyWordSpec { val expected = Array[Byte](0x12, 0x34, 0x5C) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 5, 0, 0, signed = true, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a number with an even precision" in { val expected = Array[Byte](0x01, 0x23, 0x4C) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(1234), 4, 0, 0, signed = true, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a small number" in { val expected = Array[Byte](0x00, 0x00, 0x5C) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(5), 5, 0, 0, signed = true, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode an unsigned number" in { val expected = Array[Byte](0x12, 0x34, 0x5F) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 5, 0, 0, signed = false, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a negative number" in { val expected = Array[Byte](0x12, 0x34, 0x5D) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-12345), 5, 0, 0, signed = true, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a small negative number" in { val expected = Array[Byte](0x00, 0x00, 0x7D) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-7), 4, 0, 0, signed = true, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a number without sign nibble" in { val expected = Array[Byte](0x01, 0x23, 0x45) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 5, 0, 0, signed = false, mandatorySignNibble = false) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a number without sign nibble with an even precision" in { val expected = Array[Byte](0x12, 0x34) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(1234), 4, 0, 0, signed = true, mandatorySignNibble = false) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a too big number" in { val expected = Array[Byte](0x00, 0x00, 0x00) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(123456), 5, 0, 0, signed = false, mandatorySignNibble = false) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a too big negative number" in { val expected = Array[Byte](0x00, 0x00, 0x00) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-123456), 5, 0, 0, signed = true, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } - "encode a number with nbegative scale" in { + "encode a number with negative scale" in { val expected = Array[Byte](0x00, 0x00, 0x00) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 5, -1, 0, signed = false, mandatorySignNibble = false) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "attempt to encode a negative number without sign nibble" in { val expected = Array[Byte](0x00, 0x00, 0x00) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-12345), 5, 0, 0, signed = false, mandatorySignNibble = false) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "attempt to encode a signed number without a sign nibble" in { val expected = Array[Byte](0x00, 0x00, 0x00) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-12345), 5, 0, 0, signed = true, mandatorySignNibble = false) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "attempt to encode a number with an incorrect precision" in { val expected = Array[Byte](0x00, 0x00) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 4, 0, 0, signed = false, mandatorySignNibble = false) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "attempt to encode a number with an incorrect precision with sign nibble" in { val expected = Array[Byte](0x00, 0x00, 0x00) val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 4, 0, 0, signed = true, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } - "attempt to encode a number with zero prexision" in { + "attempt to encode a number with zero precision" in { assertThrows[IllegalArgumentException](BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12345), 0, 0, 0, signed = true, mandatorySignNibble = true)) } } @@ -135,92 +135,80 @@ class BCDNumberEncodersSuite extends AnyWordSpec { "decimal number" when { "encode a number" in { val expected = Array[Byte](0x12, 0x34, 0x5C) - val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(123.45), 5, 2, 0, signed = true, mandatorySignNibble = true) + val actual = BCDNumberEncoders.encodeBCDNumber(java.math.BigDecimal.valueOf(123.45), 5, 2, 0, signed = true, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a small number" in { val expected = Array[Byte](0x00, 0x00, 0x5C) - val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(0.05), 5, 2, 0, signed = true, mandatorySignNibble = true) + val actual = BCDNumberEncoders.encodeBCDNumber(java.math.BigDecimal.valueOf(0.05), 5, 2, 0, signed = true, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode an unsigned number" in { val expected = Array[Byte](0x12, 0x34, 0x5F) - val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(1234.5), 5, 1, 0, signed = false, mandatorySignNibble = true) + val actual = BCDNumberEncoders.encodeBCDNumber(java.math.BigDecimal.valueOf(1234.5), 5, 1, 0, signed = false, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a negative number" in { val expected = Array[Byte](0x12, 0x34, 0x5D) - val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-12.345), 5, 3, 0, signed = true, mandatorySignNibble = true) + val actual = BCDNumberEncoders.encodeBCDNumber(java.math.BigDecimal.valueOf(-12.345), 5, 3, 0, signed = true, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a small negative number" in { val expected = Array[Byte](0x00, 0x00, 0x7D) - val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-0.00007), 4, 5, 0, signed = true, mandatorySignNibble = true) + val actual = BCDNumberEncoders.encodeBCDNumber(java.math.BigDecimal.valueOf(-0.00007), 4, 5, 0, signed = true, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a number without sign nibble" in { val expected = Array[Byte](0x01, 0x23, 0x45) - val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(123.45), 5, 2, 0, signed = false, mandatorySignNibble = false) + val actual = BCDNumberEncoders.encodeBCDNumber(java.math.BigDecimal.valueOf(123.45), 5, 2, 0, signed = false, mandatorySignNibble = false) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a too precise number" in { val expected = Array[Byte](0x01, 0x23, 0x46) - val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(123.456), 5, 2, 0, signed = false, mandatorySignNibble = false) + val actual = BCDNumberEncoders.encodeBCDNumber(java.math.BigDecimal.valueOf(123.456), 5, 2, 0, signed = false, mandatorySignNibble = false) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a too big number" in { val expected = Array[Byte](0x00, 0x00, 0x00) - val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(1234.56), 5, 2, 0, signed = false, mandatorySignNibble = false) + val actual = BCDNumberEncoders.encodeBCDNumber(java.math.BigDecimal.valueOf(1234.56), 5, 2, 0, signed = false, mandatorySignNibble = false) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a too big negative number" in { val expected = Array[Byte](0x00, 0x00, 0x00) - val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(-1234.56), 5, 2, 0, signed = true, mandatorySignNibble = true) + val actual = BCDNumberEncoders.encodeBCDNumber(java.math.BigDecimal.valueOf(-1234.56), 5, 2, 0, signed = true, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a number with positive scale factor" in { val expected = Array[Byte](0x00, 0x12, 0x3F) - val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(12300), 5, 0, 2, signed = false, mandatorySignNibble = true) + val actual = BCDNumberEncoders.encodeBCDNumber(java.math.BigDecimal.valueOf(12300), 5, 0, 2, signed = false, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } "encode a number with negative scale factor" in { val expected = Array[Byte](0x00, 0x12, 0x3F) - val actual = BCDNumberEncoders.encodeBCDNumber(new java.math.BigDecimal(1.23), 5, 0, -2, signed = false, mandatorySignNibble = true) + val actual = BCDNumberEncoders.encodeBCDNumber(java.math.BigDecimal.valueOf(1.23), 5, 0, -2, signed = false, mandatorySignNibble = true) - checkExpected(actual, expected) + assertArraysEqual(actual, expected) } } } - - def checkExpected(actual: Array[Byte], expected: Array[Byte]): Assertion = { - if (!actual.sameElements(expected)) { - val actualHex = actual.map(b => f"$b%02X").mkString(" ") - val expectedHex = expected.map(b => f"$b%02X").mkString(" ") - fail(s"Actual: $actualHex\nExpected: $expectedHex") - } else { - succeed - } - } - - } diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/BinaryEncodersSuite.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/BinaryEncodersSuite.scala new file mode 100644 index 000000000..cbf127452 --- /dev/null +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/BinaryEncodersSuite.scala @@ -0,0 +1,89 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.parser.encoding + +import org.scalatest.wordspec.AnyWordSpec +import za.co.absa.cobrix.cobol.testutils.ComparisonUtils._ + +class BinaryEncodersSuite extends AnyWordSpec { + "encodeBinaryNumber" should { + "encode a positive integer in big-endian format" in { + val expected = Array(0x00, 0x00, 0x30, 0x39).map(_.toByte) // 12345 in hex + val actual = BinaryEncoders.encodeBinaryNumber(new java.math.BigDecimal(12345), isSigned = true, outputSize = 4, bigEndian = true, precision = 5, scale = 0, scaleFactor = 0) + + assertArraysEqual(actual, expected) + } + + "encode a positive integer in little-endian format" in { + val expected = Array(0x39, 0x30, 0x00, 0x00).map(_.toByte) // 12345 in hex reversed + val actual = BinaryEncoders.encodeBinaryNumber(new java.math.BigDecimal(12345), isSigned = true, outputSize = 4, bigEndian = false, precision = 5, scale = 0, scaleFactor = 0) + + assertArraysEqual(actual, expected) + } + + "encode a negative integer -1 big-endian format" in { + val expected = Array(0xFF, 0xFF, 0xFF, 0xFF).map(_.toByte) + val actual = BinaryEncoders.encodeBinaryNumber(new java.math.BigDecimal(-1), isSigned = true, outputSize = 4, bigEndian = true, precision = 5, scale = 0, scaleFactor = 0) + + assertArraysEqual(actual, expected) + } + + "encode a negative integer in big-endian format" in { + val expected = Array(0xFF, 0xFF, 0xCF, 0xC7).map(_.toByte) // -12345 in hex (two's complement) + val actual = BinaryEncoders.encodeBinaryNumber(new java.math.BigDecimal(-12345), isSigned = true, outputSize = 4, bigEndian = true, precision = 5, scale = 0, scaleFactor = 0) + + assertArraysEqual(actual, expected) + } + + "encode a negative integer -1 little-endian format" in { + val expected = Array(0xFF, 0xFF, 0xFF, 0xFF).map(_.toByte) + val actual = BinaryEncoders.encodeBinaryNumber(new java.math.BigDecimal(-1), isSigned = true, outputSize = 4, bigEndian = false, precision = 4, scale = 0, scaleFactor = 0) + assertArraysEqual(actual, expected) + } + + "encode a negative integer in little-endian format" in { + val expected = Array(0xC7, 0xCF, 0xFF, 0xFF).map(_.toByte) // -12345 in hex reversed (two's complement) + val actual = BinaryEncoders.encodeBinaryNumber(new java.math.BigDecimal(-12345), isSigned = true, outputSize = 4, bigEndian = false, precision = 5, scale = 0, scaleFactor = 0) + + assertArraysEqual(actual, expected) + } + + "attempt to encode a number with the maximum digits of the precision" in { + val expected = Array(0xF1, 0xD8, 0xFF, 0xFF).map(_.toByte) + val actual = BinaryEncoders.encodeBinaryNumber(new java.math.BigDecimal(-9999), isSigned = true, outputSize = 4, bigEndian = false, precision = 4, scale = 0, scaleFactor = 0) + + assertArraysEqual(actual, expected) + } + + "attempt to encode a number bigger than the precision" in { + val expected = Array(0xF0, 0xD8, 0xFF, 0xFF).map(_.toByte) + val actual = BinaryEncoders.encodeBinaryNumber(new java.math.BigDecimal(-10000), isSigned = true, outputSize = 4, bigEndian = false, precision = 4, scale = 0, scaleFactor = 0) + + assertArraysEqual(actual, expected) + } + + "handle zero correctly" in { + val expected = Array[Byte](0x00, 0x00, 0x00, 0x00) + + val actualBigEndian = BinaryEncoders.encodeBinaryNumber(new java.math.BigDecimal(0), isSigned = true, outputSize = 4, bigEndian = true, precision = 1, scale = 0, scaleFactor = 0) + val actualLittleEndian = BinaryEncoders.encodeBinaryNumber(new java.math.BigDecimal(0), isSigned = true, outputSize = 4, bigEndian = false, precision = 1, scale = 0, scaleFactor = 0) + + assertArraysEqual(actualBigEndian, expected) + assertArraysEqual(actualLittleEndian, expected) + } + } +} diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/testutils/ComparisonUtils.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/testutils/ComparisonUtils.scala new file mode 100644 index 000000000..43fd53010 --- /dev/null +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/testutils/ComparisonUtils.scala @@ -0,0 +1,33 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.cobrix.cobol.testutils + +import org.scalatest.Assertion +import org.scalatest.Assertions.{fail, succeed} + +object ComparisonUtils { + def assertArraysEqual(actual: Array[Byte], expected: Array[Byte]): Assertion = { + if (!actual.sameElements(expected)) { + val actualHex = actual.map(b => f"$b%02X").mkString(" ") + val expectedHex = expected.map(b => f"$b%02X").mkString(" ") + fail(s"Actual: $actualHex\nExpected: $expectedHex") + } else { + succeed + } + } + +} diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSource.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSource.scala index 512bb6855..63d7ae2ad 100644 --- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSource.scala +++ b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSource.scala @@ -84,12 +84,7 @@ class DefaultSource fs.delete(outputPath, true) } case SaveMode.Append => - if (fs.exists(outputPath)) { - throw new IllegalArgumentException( - s"Save mode '$mode' is not supported by the 'spark-cobol' data source at the moment. " + - "Please use 'Overwrite' mode to write data to a file or folder." - ) - } + // In append mode, no action is needed. Tasks will write to different files. case SaveMode.ErrorIfExists => if (fs.exists(outputPath)) { throw new IllegalArgumentException( diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/writer/RawBinaryOutputFormat.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/writer/RawBinaryOutputFormat.scala index 54115bf01..e82ac2e62 100644 --- a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/writer/RawBinaryOutputFormat.scala +++ b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/writer/RawBinaryOutputFormat.scala @@ -17,11 +17,13 @@ package za.co.absa.cobrix.spark.cobol.writer import org.apache.hadoop.fs.Path -import org.apache.hadoop.mapreduce._ import org.apache.hadoop.io.{BytesWritable, NullWritable} +import org.apache.hadoop.mapreduce._ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.getOutputPath import java.io.DataOutputStream +import java.util.UUID /** * A custom implementation of `FileOutputFormat` that outputs raw binary data for fixed record length @@ -39,6 +41,29 @@ import java.io.DataOutputStream */ class RawBinaryOutputFormat extends FileOutputFormat[NullWritable, BytesWritable] { + private val uniqueUuid = UUID.randomUUID().toString + + override def checkOutputSpecs(job: JobContext): Unit = { + val outDir = getOutputPath(job) + if (outDir == null) throw new IllegalStateException("Output directory not set.") + val fs = outDir.getFileSystem(job.getConfiguration) + if (fs.exists(outDir) && !fs.getFileStatus(outDir).isDirectory) + throw new IllegalStateException(s"Output path '$outDir' is not a directory.") + } + + override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = { + val conf = context.getConfiguration + val attempt = context.getTaskAttemptID + val writeJobId = Option(conf.get("spark.sql.sources.writeJobUUID")) + .orElse(Option(attempt.getJobID).map(_.toString)) + .getOrElse(uniqueUuid) + val taskId = f"${attempt.getTaskID.getId}%05d" + val attemptId = f"c${attempt.getId}%03d" + val filename = s"part-$taskId-$writeJobId-$attemptId$extension" + val parent = super.getDefaultWorkFile(context, extension).getParent // committer work path + new Path(parent, filename) + } + override def getRecordWriter(context: TaskAttemptContext): RecordWriter[NullWritable, BytesWritable] = { val extension = context.getConfiguration.get("cobol.writer.output.extension", ".dat") val path: Path = getDefaultWorkFile(context, extension) diff --git a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/writer/FixedLengthEbcdicWriterSuite.scala b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/writer/FixedLengthEbcdicWriterSuite.scala index 5367d9279..ef425e5b3 100644 --- a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/writer/FixedLengthEbcdicWriterSuite.scala +++ b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/writer/FixedLengthEbcdicWriterSuite.scala @@ -64,7 +64,7 @@ class FixedLengthEbcdicWriterSuite extends AnyWordSpec with SparkTestBase with B val expected = Array[Byte]( 0xC1.toByte, 0xC6.toByte, 0x89.toByte, 0x99.toByte, 0xa2.toByte, 0xa3.toByte, // A,First 0xC2.toByte, 0xE2.toByte, 0x83.toByte, 0x95.toByte, 0x84.toByte, 0x40.toByte, // B,Scnd_ - 0xC3.toByte, 0xD3.toByte, 0x81.toByte, 0xa2.toByte, 0xa3.toByte, 0x40.toByte // C,Last_ + 0xC3.toByte, 0xD3.toByte, 0x81.toByte, 0xa2.toByte, 0xa3.toByte, 0x40.toByte // C,Last_ ) if (!bytes.sameElements(expected)) { @@ -114,7 +114,7 @@ class FixedLengthEbcdicWriterSuite extends AnyWordSpec with SparkTestBase with B val expected = Array[Byte]( 0xC1.toByte, 0x00.toByte, 0xC6.toByte, 0x89.toByte, 0x99.toByte, 0xa2.toByte, 0xa3.toByte, // A,First 0xC2.toByte, 0x00.toByte, 0xE2.toByte, 0x83.toByte, 0x95.toByte, 0x84.toByte, 0x40.toByte, // B,Scnd_ - 0xC3.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte // C,Last_ + 0xC3.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte // C,Last_ ) if (!bytes.sameElements(expected)) { @@ -183,8 +183,82 @@ class FixedLengthEbcdicWriterSuite extends AnyWordSpec with SparkTestBase with B } } + "write data frames with COMP fields" in { + withTempDirectory("cobol_writer1") { tempDir => + val df = List( + (1, 100.5, new java.math.BigDecimal(10.23), 1, 10050, new java.math.BigDecimal(10.12)), + (2, 800.4, new java.math.BigDecimal(30), 2, 80040, new java.math.BigDecimal(30)), + (3, 22.33, new java.math.BigDecimal(-20), 3, -2233, new java.math.BigDecimal(-20)) + ).toDF("A", "B", "C", "D", "E", "F") + + val path = new Path(tempDir, "writer1") + + val copybookContentsWithBinFields = + """ 01 RECORD. + 05 A PIC S9(1) COMP. + 05 B PIC 9(4)V9(2) COMP-4. + 05 C PIC S9(2)V9(2) BINARY. + 05 D PIC 9(1) COMP-9. + 05 E PIC S9(6) COMP-9. + 05 F PIC 9(2)V9(2) COMP-9. + """ + + df.coalesce(1) + .orderBy("A") + .write + .format("cobol") + .mode(SaveMode.Overwrite) + .option("copybook_contents", copybookContentsWithBinFields) + .save(path.toString) + + val fs = path.getFileSystem(spark.sparkContext.hadoopConfiguration) + + assert(fs.exists(path), "Output directory should exist") + val files = fs.listStatus(path) + .filter(_.getPath.getName.startsWith("part-")) + + assert(files.nonEmpty, "Output directory should contain part files") + + val partFile = files.head.getPath + val data = fs.open(partFile) + val bytes = new Array[Byte](files.head.getLen.toInt) + data.readFully(bytes) + data.close() + + // Expected EBCDIC data for sample test data + val expected = Array( + 0x00, 0x01, // 1 (short, big-endian) + 0x00, 0x00, 0x27, 0x42, // 100.5 -> 10050(int, big-endian) + 0x03, 0xFF, // 10.23 -> 1023(short, big-endian) + 0x01, // 1 (byte) + 0x42, 0x27, 0x00, 0x00, // 10050(int, little-endian) + 0xF4, 0x03, // 10.12 -> 1012(short, little-endian) + + 0x00, 0x02, // 2 (short, big-endian) + 0x00, 0x01, 0x38, 0xA8, // 800.4 -> 80040(int, big-endian) + 0x0B, 0xB8, // 30 -> 3000(short, big-endian) + 0x02, // 2 (byte) + 0xA8, 0x38, 0x01, 0x00, // 80040(int, little-endian) + 0xB8, 0x0B, // 30 -> 3000(short, little-endian) + + 0x00, 0x03, // 3 (short, big-endian) + 0x00, 0x00, 0x08, 0xB9, // 22.33 -> 2233(int, big-endian) + 0xF8, 0x30, // -20 -> -2000(short, big-endian) + 0x03, // 3 (byte) + 0x47, 0xF7, 0xFF, 0xFF, // -2233(int, little-endian) + 0x00, 0x00 // null, because -20 cannot fix the unsigned type + ).map(_.toByte) + + if (!bytes.sameElements(expected)) { + println(s"Expected bytes: ${expected.map("%02X" format _).mkString(" ")}") + println(s"Actual bytes: ${bytes.map("%02X" format _).mkString(" ")}") - "write should fail with save mode append and the path exists" in { + assert(bytes.sameElements(expected), "Written data should match expected EBCDIC encoding") + } + } + } + + "write should successfully append" in { withTempDirectory("cobol_writer3") { tempDir => val df = List(("A", "First"), ("B", "Scnd"), ("C", "Last")).toDF("A", "B") @@ -196,13 +270,19 @@ class FixedLengthEbcdicWriterSuite extends AnyWordSpec with SparkTestBase with B .option("copybook_contents", copybookContents) .save(path.toString) - assertThrows[IllegalArgumentException] { - df.write - .format("cobol") - .mode(SaveMode.Append) - .option("copybook_contents", copybookContents) - .save(path.toString) - } + df.write + .format("cobol") + .mode(SaveMode.Append) + .option("copybook_contents", copybookContents) + .save(path.toString) + + val fs = path.getFileSystem(spark.sparkContext.hadoopConfiguration) + + assert(fs.exists(path), "Output directory should exist") + val files = fs.listStatus(path) + .filter(_.getPath.getName.startsWith("part-")) + + assert(files.length > 1) } }