|
17 | 17 | package za.co.absa.cobrix.cobol.reader.parameters |
18 | 18 |
|
19 | 19 | /** |
20 | | - * This class holds the parameters currently used for parsing variable-length records. |
| 20 | + * This class is used to hold the parameters currently used for parsing variable-length records. |
21 | 21 | * |
22 | | - * @param isRecordSequence Does input files have 4 byte record length headers |
23 | | - * @param bdw Block descriptor word (if specified), for FB and VB record formats |
24 | | - * @param isRdwBigEndian Is RDW big endian? It may depend on flavor of mainframe and/or mainframe to PC transfer method |
25 | | - * @param isRdwPartRecLength Does RDW count itself as part of record length itself |
26 | | - * @param rdwAdjustment Controls a mismatch between RDW and record length |
27 | | - * @param recordHeaderParser An optional custom record header parser for non-standard RDWs |
28 | | - * @param recordExtractor An optional custom raw record parser class non-standard record types |
29 | | - * @param rhpAdditionalInfo An optional additional option string passed to a custom record header parser |
30 | | - * @param reAdditionalInfo An optional additional option string passed to a custom record extractor |
31 | | - * @param recordLengthField A field that stores record length |
32 | | - * @param recordLengthMap A mapping between field value and record size. |
33 | | - * @param fileStartOffset A number of bytes to skip at the beginning of each file |
34 | | - * @param fileEndOffset A number of bytes to skip at the end of each file |
35 | | - * @param generateRecordId Generate a sequential record number for each record to be able to retain the order of the original data |
36 | | - * @param isUsingIndex Is indexing input file before processing is requested |
37 | | - * @param isIndexCachingAllowed Is caching of generated index allowed |
38 | | - * @param inputSplitSizeMB A partition size to target. In certain circumstances this size may not be exactly that, but the library will do the best effort to target that size |
39 | | - * @param inputSplitRecords The number of records to include in each partition. Notice mainframe records may have variable size, inputSplitMB is the recommended option |
40 | | - * @param improveLocality Tries to improve locality by extracting preferred locations for variable-length records |
41 | | - * @param optimizeAllocation Optimizes cluster usage in case of optimization for locality in the presence of new nodes (nodes that do not contain any blocks of the files being processed) |
42 | | - * @param inputFileNameColumn A column name to add to the dataframe. The column will contain input file name for each record similar to 'input_file_name()' function |
| 22 | + * @param isRecordSequence Do input files have 4 byte record length headers |
| 23 | + * @param bdw Block descriptor word (if specified), for FB and VB record formats |
| 24 | + * @param isRdwBigEndian Is RDW big endian? It may depend on flavor of mainframe and/or mainframe to PC transfer method |
| 25 | + * @param isRdwPartRecLength Does RDW count itself as part of record length itself |
| 26 | + * @param rdwAdjustment Controls a mismatch between RDW and record length |
| 27 | + * @param recordHeaderParser An optional custom record header parser for non-standard RDWs |
| 28 | + * @param recordExtractor An optional custom raw record parser class for non-standard record types |
| 29 | + * @param rhpAdditionalInfo An optional additional option string passed to a custom record header parser |
| 30 | + * @param reAdditionalInfo An optional additional option string passed to a custom record extractor |
| 31 | + * @param recordLengthField A field that stores record length |
| 32 | + * @param recordLengthMap A mapping between field value and record size. |
| 33 | + * @param fileStartOffset A number of bytes to skip at the beginning of each file |
| 34 | + * @param fileEndOffset A number of bytes to skip at the end of each file |
| 35 | + * @param generateRecordId Generate a sequential record number for each record to be able to retain the order of the original data |
| 36 | + * @param isUsingIndex Is indexing input file before processing is requested |
| 37 | + * @param isIndexCachingAllowed Is caching of generated index allowed |
| 38 | + * @param inputSplitSizeMB A partition size to target. In certain circumstances this size may not be exactly that, but the library will do the best effort to target that size |
| 39 | + * @param inputSplitSizeCompressedMB A partition size to target for compressed files. |
| 40 | + * @param inputSplitRecords The number of records to include in each partition. Notice mainframe records may have variable size, inputSplitMB is the recommended option |
| 41 | + * @param improveLocality Tries to improve locality by extracting preferred locations for variable-length records |
| 42 | + * @param optimizeAllocation Optimizes cluster usage in case of optimization for locality in the presence of new nodes (nodes that do not contain any blocks of the files being processed) |
| 43 | + * @param inputFileNameColumn A column name to add to the dataframe. The column will contain input file name for each record similar to 'input_file_name()' function |
43 | 44 | */ |
44 | 45 | case class VariableLengthParameters( |
45 | | - isRecordSequence: Boolean, // [deprecated by recordFormat] |
46 | | - bdw: Option[Bdw], |
47 | | - isRdwBigEndian: Boolean, |
48 | | - isRdwPartRecLength: Boolean, |
49 | | - rdwAdjustment: Int, |
50 | | - recordHeaderParser: Option[String], |
51 | | - recordExtractor: Option[String], |
52 | | - rhpAdditionalInfo: Option[String], |
53 | | - reAdditionalInfo: String, |
54 | | - recordLengthField: String, |
55 | | - recordLengthMap: Map[String, Int], |
56 | | - fileStartOffset: Int, |
57 | | - fileEndOffset: Int, |
58 | | - generateRecordId: Boolean, |
59 | | - isUsingIndex: Boolean, |
60 | | - isIndexCachingAllowed: Boolean, |
61 | | - inputSplitRecords: Option[Int], |
62 | | - inputSplitSizeMB: Option[Int], |
63 | | - improveLocality: Boolean, |
64 | | - optimizeAllocation: Boolean, |
65 | | - inputFileNameColumn: String, |
66 | | - occursMappings: Map[String, Map[String, Int]] |
| 46 | + isRecordSequence: Boolean, // [deprecated by recordFormat] |
| 47 | + bdw: Option[Bdw], |
| 48 | + isRdwBigEndian: Boolean, |
| 49 | + isRdwPartRecLength: Boolean, |
| 50 | + rdwAdjustment: Int, |
| 51 | + recordHeaderParser: Option[String], |
| 52 | + recordExtractor: Option[String], |
| 53 | + rhpAdditionalInfo: Option[String], |
| 54 | + reAdditionalInfo: String, |
| 55 | + recordLengthField: String, |
| 56 | + recordLengthMap: Map[String, Int], |
| 57 | + fileStartOffset: Int, |
| 58 | + fileEndOffset: Int, |
| 59 | + generateRecordId: Boolean, |
| 60 | + isUsingIndex: Boolean, |
| 61 | + isIndexCachingAllowed: Boolean, |
| 62 | + inputSplitRecords: Option[Int], |
| 63 | + inputSplitSizeMB: Option[Int], |
| 64 | + inputSplitSizeCompressedMB: Option[Int], |
| 65 | + improveLocality: Boolean, |
| 66 | + optimizeAllocation: Boolean, |
| 67 | + inputFileNameColumn: String, |
| 68 | + occursMappings: Map[String, Map[String, Int]] |
67 | 69 | ) |
0 commit comments