diff --git a/README.md b/README.md index 87a1de241..754d68ce4 100644 --- a/README.md +++ b/README.md @@ -485,6 +485,13 @@ If the record field contains a string that can be mapped to a record size, you c .option("record_length_map", """{"SEG1":100,"SEG2":200}""") ``` +You can specify the default record size by defining the key "_": +``` +.option("record_format", "F") +.option("record_length_field", "FIELD_STR") +.option("record_length_map", """{"SEG1":100,"SEG2":200,"_":100}""") +``` + ### Use cases for various variable length formats In order to understand the file format it is often sufficient to look at the first 4 bytes of the file (un case of RDW only files), diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedWithRecordLengthExprRawRecordExtractor.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedWithRecordLengthExprRawRecordExtractor.scala index ba57a6718..ef1a9c00c 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedWithRecordLengthExprRawRecordExtractor.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/extractors/raw/FixedWithRecordLengthExprRawRecordExtractor.scala @@ -32,6 +32,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext, final private val lengthField = recordLengthField.map(_.field) final private val lengthMap = recordLengthField.map(_.valueMap).getOrElse(Map.empty) + final private val defaultRecordLength = lengthMap.get("_") final private val isLengthMapEmpty = lengthMap.isEmpty type RawRecord = (String, Array[Byte]) @@ -131,8 +132,8 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext, case i: Int => getRecordLengthFromMapping(i.toString) case l: Long => getRecordLengthFromMapping(l.toString) case s: String => getRecordLengthFromMapping(s) - case null => throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${getBytesAsHexString(binaryDataStart)}).") - case _ => throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.") + case null => defaultRecordLength.getOrElse(throw new IllegalStateException(s"Null encountered as a record length field (offset: $byteIndex, raw value: ${getBytesAsHexString(binaryDataStart)}).")) + case _ => throw new IllegalStateException(s"Record length value of the field ${lengthAST.name} must be an integral type.") } } length + recordLengthAdjustment @@ -141,7 +142,7 @@ class FixedWithRecordLengthExprRawRecordExtractor(ctx: RawRecordContext, final private def getRecordLengthFromMapping(v: String): Int = { lengthMap.get(v) match { case Some(len) => len - case None => throw new IllegalStateException(s"Record length value '$v' is not mapped to a record length.") + case None => defaultRecordLength.getOrElse(throw new IllegalStateException(s"Record length value '$v' is not mapped to a record length.")) } } diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/iterator/VRLRecordReaderSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/iterator/VRLRecordReaderSpec.scala index 4992c2c2b..c7bc2d522 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/iterator/VRLRecordReaderSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/reader/iterator/VRLRecordReaderSpec.scala @@ -225,6 +225,58 @@ class VRLRecordReaderSpec extends AnyWordSpec { assert(ex.getMessage == "The record length field LEN must be an integral type or a value mapping must be specified.") } + + "the length mapping with default record length" in { + val copybookWithLenbgthMap = + """ 01 RECORD. + 05 LEN_SPEC PIC X(1). + 05 N PIC 9(2). + 05 A PIC X(3). + """ + + val records = Array( + 0xC1, 0xF1, 0xF2, 0xC1, + 0xC2, 0xF3, 0xF4, 0xC2, 0xC3, + 0xC3, 0xF5, 0xF6, 0xC4, 0xC5, 0xC6 + ).map(_.toByte) + + val streamH = new ByteStreamMock(records) + val streamD = new ByteStreamMock(records) + val context = RawRecordContext(0, streamH, streamD, CopybookParser.parseSimple(copybookWithLenbgthMap), null, null, "") + + val readerParameters = ReaderParameters( + lengthFieldExpression = Some("LEN_SPEC"), + lengthFieldMap = Map("A" -> 4, "B" -> 5, "_" -> 6)) + + val reader = getUseCase( + copybook = copybookWithLenbgthMap, + records = records, + lengthFieldExpression = Some("LEN_SPEC"), + recordExtractor = Some(new FixedWithRecordLengthExprRawRecordExtractor(context, readerParameters))) + + assert(reader.hasNext) + val (segment1, record1) = reader.next() + assert(reader.hasNext) + val (segment2, record2) = reader.next() + assert(reader.hasNext) + val (segment3, record3) = reader.next() + assert(!reader.hasNext) + + assert(segment1.isEmpty) + assert(segment2.isEmpty) + assert(segment3.isEmpty) + assert(record1.length == 4) + assert(record2.length == 5) + assert(record3.length == 6) + assert(record1(0) == 0xC1.toByte) + assert(record1(1) == 0xF1.toByte) + assert(record1(2) == 0xF2.toByte) + assert(record1(3) == 0xC1.toByte) + assert(record2(0) == 0xC2.toByte) + assert(record2(1) == 0xF3.toByte) + assert(record3(0) == 0xC3.toByte) + assert(record3(1) == 0xF5.toByte) + } } "work with record length expressions" in { diff --git a/pom.xml b/pom.xml index 4a118ae6e..ec3d371eb 100644 --- a/pom.xml +++ b/pom.xml @@ -367,46 +367,6 @@ - - org.apache.rat - apache-rat-plugin - ${maven.rat.plugin.version} - - - verify - - check - - - - - - **/*.sbt - **/*.properties - **/*.json - **/*.csv - **/*.txt - **/*.bin - **/*.md - **/*.iml - **/*.csv - **/*.cob - **/*.cpy - **/*.svg - **/*.plot - **/*.yml - **/*.interp - **/*.tokens - **/_* - **/dependency-reduced-pom.xml - **/.idea/** - **/target/** - **/org.apache.spark.sql.sources.DataSourceRegister - dependency-reduced-pom.xml - .github/CODEOWNERS - - -