Skip to content

Commit 217f926

Browse files
Multiple input paths
1 parent 090a1dc commit 217f926

File tree

3 files changed

+21
-17
lines changed

3 files changed

+21
-17
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,27 @@ Build jar from source with
1818
and find the output JAR file as `build/libs/restructurehdfs-0.3.3-all.jar`. Then run with:
1919

2020
```shell
21-
java -jar restructurehdfs-0.3.3-all.jar --hdfs-uri <webhdfs_url> --hdfs-root-directory <hdfs_topic_path> --output-directory <output_folder>
21+
java -jar restructurehdfs-0.3.3-all.jar --hdfs-uri <webhdfs_url> --output-directory <output_folder> <input_path_1> [<input_path_2> ...]
2222
```
2323
or you can use the short form as well like -
2424
```shell
25-
java -jar restructurehdfs-0.3.3-all.jar -u <webhdfs_url> -i <hdfs_topic_path> -o <output_folder>
25+
java -jar restructurehdfs-0.3.3-all.jar -u <webhdfs_url> -o <output_folder> <input_path_1> [<input_path_2> ...]
2626
```
2727

2828
To display the usage and all available options you can use the help option as follows -
2929
```shell
3030
java -jar restructurehdfs-0.3.3-all.jar --help
3131
```
32-
Note that the options preceded by the `*` in the above output are required to run the app.
32+
Note that the options preceded by the `*` in the above output are required to run the app. Also note that there can be multiple input paths from which to read the files. Eg - `/topicAndroidNew/topic1 /topicAndroidNew/topic2 ...`. At least one input path is required.
3333

3434
By default, this will output the data in CSV format. If JSON format is preferred, use the following instead:
3535
```shell
36-
java -jar restructurehdfs-0.3.3-all.jar --format json --hdfs-uri <webhdfs_url> --hdfs-root-directory <hdfs_topic_path> --output-directory <output_folder>
36+
java -jar restructurehdfs-0.3.3-all.jar --format json --hdfs-uri <webhdfs_url> --output-directory <output_folder> <input_path_1> [<input_path_2> ...]
3737
```
3838

3939
Another option is to output the data in compressed form. All files will get the `gz` suffix, and can be decompressed with a GZIP decoder. Note that for a very small number of records, this may actually increase the file size.
4040
```
41-
java -jar restructurehdfs-0.3.3-all.jar --compression gzip --hdfs-uri <webhdfs_url> --hdfs-root-directory <hdfs_topic_path> --output-directory <output_folder>
41+
java -jar restructurehdfs-0.3.3-all.jar --compression gzip --hdfs-uri <webhdfs_url> --output-directory <output_folder> <input_path_1> [<input_path_2> ...]
4242
```
4343

4444
Finally, by default, files records are not deduplicated after writing. To enable this behaviour, specify the option `--deduplicate` or `-d`. This set to false by default because of an issue with Biovotion data. Please see - [issue #16](https://github.com/RADAR-base/Restructure-HDFS-topic/issues/16) before enabling it.

src/main/java/org/radarcns/RestructureAvroRecords.java

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,14 @@ public class RestructureAvroRecords {
7272

7373
private long processedFileCount;
7474
private long processedRecordsCount;
75-
private boolean useGzip;
76-
private boolean doDeduplicate;
77-
78-
private static final CommandLineArgs commandLineArgs = new CommandLineArgs();
79-
private static final JCommander parser = JCommander.newBuilder().addObject(commandLineArgs).build();
75+
private final boolean useGzip;
76+
private final boolean doDeduplicate;
8077

8178
public static void main(String [] args) throws Exception {
8279

80+
final CommandLineArgs commandLineArgs = new CommandLineArgs();
81+
final JCommander parser = JCommander.newBuilder().addObject(commandLineArgs).build();
82+
8383
parser.setProgramName("hadoop jar restructurehdfs-all-0.3.3.jar");
8484
parser.parse(args);
8585

@@ -90,9 +90,6 @@ public static void main(String [] args) throws Exception {
9090

9191
logger.info(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()));
9292
logger.info("Starting...");
93-
logger.info("In: " + commandLineArgs.hdfsUri + commandLineArgs.hdfsRootDirectory);
94-
logger.info("Out: " + commandLineArgs.outputDirectory);
95-
9693

9794
long time1 = System.currentTimeMillis();
9895

@@ -103,7 +100,11 @@ public static void main(String [] args) throws Exception {
103100
.build();
104101

105102
try {
106-
restr.start(commandLineArgs.hdfsRootDirectory);
103+
for(String input : commandLineArgs.inputPaths) {
104+
logger.info("In: " + commandLineArgs.hdfsUri + input);
105+
logger.info("Out: " + commandLineArgs.outputDirectory);
106+
restr.start(input);
107+
}
107108
} catch (IOException ex) {
108109
logger.error("Processing failed", ex);
109110
}

src/main/java/org/radarcns/util/commandline/CommandLineArgs.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,14 @@
22

33
import com.beust.jcommander.Parameter;
44

5+
import java.util.ArrayList;
6+
import java.util.List;
7+
58
public class CommandLineArgs {
69

10+
@Parameter(description = "<input_path_1> [<input_path_2> ...]", variableArity = true, required = true)
11+
public List<String> inputPaths = new ArrayList<>();
12+
713
@Parameter(names = { "-f", "--format" }, description = "Format to use when converting the files. JSON and CSV is available.")
814
public String format = "csv";
915

@@ -17,9 +23,6 @@ public class CommandLineArgs {
1723
@Parameter(names = { "-u", "--hdfs-uri" }, description = "The HDFS uri to connect to. Eg - 'hdfs://<HOST>:<RPC_PORT>/<PATH>'.", required = true, validateWith = { HdfsUriValidator.class, PathValidator.class })
1824
public String hdfsUri;
1925

20-
@Parameter(names = { "-i", "--hdfs-root-directory" }, description = "The input HDFS root directory from which files are to be read. Eg - '/topicAndroidNew'", required = true, validateWith = PathValidator.class)
21-
public String hdfsRootDirectory;
22-
2326
@Parameter(names = { "-o", "--output-directory"}, description = "The output folder where the files are to be extracted.", required = true, validateWith = PathValidator.class)
2427
public String outputDirectory;
2528

0 commit comments

Comments
 (0)