24
24
import java .io .*;
25
25
import java .math .BigInteger ;
26
26
import java .util .ArrayList ;
27
- import java .util .Collection ;
28
27
import java .util .Collections ;
29
28
import java .util .List ;
29
+ import java .util .regex .Pattern ;
30
30
import java .util .stream .Collectors ;
31
31
import java .util .stream .IntStream ;
32
32
import java .util .stream .Stream ;
33
33
34
34
public class SplitPartitions {
35
35
36
36
public static Logger logger = LoggerFactory .getLogger (SplitPartitions .class .getName ());
37
+ private static final int MAX_NUM_PARTS_FOR_PARTITION_FILE = 10 ;
37
38
38
39
public static List <Partition > getRandomSubPartitions (int numSplits , BigInteger min , BigInteger max , int coveragePercent ) {
39
40
logger .info ("ThreadID: {} Splitting min: {} max: {}" , Thread .currentThread ().getId (), min , max );
@@ -48,16 +49,23 @@ public static List<Partition> getRandomSubPartitions(int numSplits, BigInteger m
48
49
public static List <Partition > getSubPartitionsFromFile (int numSplits , String inputFilename ) throws IOException {
49
50
logger .info ("ThreadID: {} Splitting partitions in file: {} using a split-size of {}"
50
51
, Thread .currentThread ().getId (), inputFilename , numSplits );
52
+ if (numSplits > 10 ) {
53
+ logger .warn ("Resetting spark.cdm.perfops.numParts value of {} to max allowed value of {} when using a partition file: {}" ,
54
+ numSplits , MAX_NUM_PARTS_FOR_PARTITION_FILE , inputFilename );
55
+ numSplits = MAX_NUM_PARTS_FOR_PARTITION_FILE ;
56
+ }
51
57
List <Partition > partitions = new ArrayList <Partition >();
52
58
BufferedReader reader = getfileReader (inputFilename );
53
59
String line = null ;
60
+ PartitionMinMax pMinMax ;
54
61
while ((line = reader .readLine ()) != null ) {
55
- if (line .startsWith ("#" )) {
56
- continue ;
57
- }
58
- String [] minMax = line .split ("," );
59
62
try {
60
- partitions .addAll (getSubPartitions (numSplits , new BigInteger (minMax [0 ]), new BigInteger (minMax [1 ]), 100 ));
63
+ pMinMax = new PartitionMinMax (line );
64
+ if (pMinMax .hasError ) {
65
+ logger .error ("Skipping " + pMinMax .error );
66
+ continue ;
67
+ }
68
+ partitions .addAll (getSubPartitions (numSplits , pMinMax .min , pMinMax .max , 100 ));
61
69
} catch (Exception e ) {
62
70
logger .error ("Skipping partition: {}" , line , e );
63
71
}
@@ -66,6 +74,26 @@ public static List<Partition> getSubPartitionsFromFile(int numSplits, String inp
66
74
return partitions ;
67
75
}
68
76
77
+ static class PartitionMinMax {
78
+ static final Pattern pat = Pattern .compile ("^-?[0-9]*,-?[0-9]*" );
79
+ public BigInteger min ;
80
+ public BigInteger max ;
81
+ public boolean hasError = false ;
82
+ public String error ;
83
+
84
+ public PartitionMinMax (String line ) {
85
+ line = line .replaceAll (" " , "" );
86
+ if (!pat .matcher (line ).matches ()) {
87
+ error = "Invaliding partition line: " + line ;
88
+ hasError = true ;
89
+ return ;
90
+ }
91
+ String [] minMax = line .split ("," );
92
+ min = new BigInteger (minMax [0 ]);
93
+ max = new BigInteger (minMax [1 ]);
94
+ }
95
+ }
96
+
69
97
public static List <PKRows > getRowPartsFromFile (int numSplits , String inputFilename ) throws IOException {
70
98
logger .info ("ThreadID: {} Splitting rows in file: {} using a split-size of {}"
71
99
, Thread .currentThread ().getId (), inputFilename , numSplits );
0 commit comments