Skip to content

Commit cdeffd2

Browse files
authored
Merge pull request #42 from RADAR-base/release-0.5.5
Release 0.5.5
2 parents af1d14b + 7c4c57f commit cdeffd2

28 files changed

+320
-146
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Data streamed to HDFS using the [RADAR HDFS sink connector](https://github.com/R
88

99
This package is available as docker image [`radarbase/radar-hdfs-restructure`](https://hub.docker.com/r/radarbase/radar-hdfs-restructure). The entrypoint of the image is the current application. So in all of the commands listed in usage, replace `radar-hdfs-restructure` with for example:
1010
```shell
11-
docker run --rm -t --network hadoop -v "$PWD/output:/output" radarbase/radar-hdfs-restructure:0.5.3 -n hdfs-namenode -o /output /myTopic
11+
docker run --rm -t --network hadoop -v "$PWD/output:/output" radarbase/radar-hdfs-restructure:0.5.5 -n hdfs-namenode -o /output /myTopic
1212
```
1313
if your docker cluster is running in the `hadoop` network and your output directory should be `./output`.
1414

@@ -23,7 +23,7 @@ This package requires at least Java JDK 8. Build the distribution with
2323
and install the package into `/usr/local` with for example
2424
```shell
2525
sudo mkdir -p /usr/local
26-
sudo tar -xzf build/distributions/radar-hdfs-restructure-0.5.3.tar.gz -C /usr/local --strip-components=1
26+
sudo tar -xzf build/distributions/radar-hdfs-restructure-0.5.5.tar.gz -C /usr/local --strip-components=1
2727
```
2828

2929
Now the `radar-hdfs-restructure` command should be available.

build.gradle

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@ plugins {
22
id 'java'
33
id 'java-library'
44
id 'application'
5-
id 'com.jfrog.bintray' version '1.8.1'
5+
id 'com.jfrog.bintray' version '1.8.4'
66
id 'maven-publish'
77
}
88

99
group 'org.radarcns'
10-
version '0.5.4'
10+
version '0.5.5'
1111
mainClassName = 'org.radarcns.hdfs.Application'
1212

1313
sourceCompatibility = '1.8'
@@ -25,6 +25,7 @@ ext {
2525
hadoopVersion = '3.0.3'
2626
jCommanderVersion = '1.72'
2727
almworksVersion = '1.1.1'
28+
junitVersion = '5.4.0-M1'
2829
}
2930

3031
repositories {
@@ -43,7 +44,9 @@ dependencies {
4344

4445
runtimeOnly group: 'org.apache.hadoop', name: 'hadoop-hdfs-client', version: hadoopVersion
4546

46-
testImplementation group: 'junit', name: 'junit', version: '4.12'
47+
testCompile group: 'org.junit.jupiter', name: 'junit-jupiter-api', version: junitVersion
48+
testCompile group: 'org.junit.jupiter', name: 'junit-jupiter-params', version: junitVersion
49+
testRuntime group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: junitVersion
4750
}
4851

4952
ext.sharedManifest = manifest {
@@ -80,9 +83,17 @@ startScripts {
8083
}
8184
}
8285

86+
test {
87+
useJUnitPlatform()
88+
testLogging {
89+
events "passed", "skipped", "failed"
90+
}
91+
}
92+
93+
8394
tasks.withType(Tar){
8495
compression = Compression.GZIP
85-
extension = 'tar.gz'
96+
archiveExtension.set('tar.gz')
8697
}
8798

8899
task downloadDependencies {
@@ -93,18 +104,18 @@ task downloadDependencies {
93104

94105
task copyDependencies(type: Copy) {
95106
from configurations.runtimeClasspath.files
96-
into "${buildDir}/third-party/"
107+
into "$buildDir/third-party/"
97108
}
98109

99110
// custom tasks for creating source/javadoc jars
100111
task sourcesJar(type: Jar, dependsOn: classes) {
101-
classifier = 'sources'
112+
archiveClassifier.set('sources')
102113
from sourceSets.main.allSource
103114
manifest.from sharedManifest
104115
}
105116

106117
task javadocJar(type: Jar, dependsOn: javadoc) {
107-
classifier = 'javadoc'
118+
archiveClassifier.set('javadoc')
108119
from javadoc.destinationDir
109120
manifest.from sharedManifest
110121
}
@@ -179,5 +190,5 @@ bintray {
179190
}
180191

181192
wrapper {
182-
gradleVersion '4.10.2'
193+
gradleVersion '5.2.1'
183194
}

gradle/wrapper/gradle-wrapper.jar

-987 Bytes
Binary file not shown.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
distributionBase=GRADLE_USER_HOME
22
distributionPath=wrapper/dists
3-
distributionUrl=https\://services.gradle.org/distributions/gradle-4.10.2-bin.zip
3+
distributionUrl=https\://services.gradle.org/distributions/gradle-5.2.1-bin.zip
44
zipStoreBase=GRADLE_USER_HOME
55
zipStorePath=wrapper/dists

gradlew

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ APP_NAME="Gradle"
2828
APP_BASE_NAME=`basename "$0"`
2929

3030
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31-
DEFAULT_JVM_OPTS=""
31+
DEFAULT_JVM_OPTS='"-Xmx64m"'
3232

3333
# Use the maximum available, or set MAX_FD != -1 to use that value.
3434
MAX_FD="maximum"

gradlew.bat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ set APP_BASE_NAME=%~n0
1414
set APP_HOME=%DIRNAME%
1515

1616
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17-
set DEFAULT_JVM_OPTS=
17+
set DEFAULT_JVM_OPTS="-Xmx64m"
1818

1919
@rem Find java.exe
2020
if defined JAVA_HOME goto findJavaFromJavaHome

settings.gradle

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
11
rootProject.name = 'radar-hdfs-restructure'
2-
enableFeaturePreview('STABLE_PUBLISHING')

src/main/java/org/radarcns/hdfs/Application.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ public static void main(String [] args) {
115115
.tempDir(commandLineArgs.tmpDir)
116116
.numThreads(commandLineArgs.numThreads)
117117
.maxFilesPerTopic(commandLineArgs.maxFilesPerTopic)
118+
.excludeTopics(commandLineArgs.excludeTopics)
118119
.build();
119120

120121
HdfsSettings hdfsSettings = new HdfsSettings.Builder(commandLineArgs.hdfsName)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright 2018 The Hyve
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.radarcns.hdfs;
18+
19+
import static java.time.ZoneOffset.UTC;
20+
21+
import java.time.format.DateTimeFormatter;
22+
23+
public class MonthlyObservationKeyPathFactory extends ObservationKeyPathFactory {
24+
private static final DateTimeFormatter PER_MONTH_HOURLY_TIME_BIN_FORMAT = DateTimeFormatter.ofPattern("yyyyMM/yyyyMMdd_HH'00'")
25+
.withZone(UTC);
26+
27+
28+
public DateTimeFormatter getTimeBinFormat() {
29+
return PER_MONTH_HOURLY_TIME_BIN_FORMAT;
30+
}
31+
}

src/main/java/org/radarcns/hdfs/RadarHdfsRestructure.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,14 @@ public class RadarHdfsRestructure {
5858
private static final Logger logger = LoggerFactory.getLogger(RadarHdfsRestructure.class);
5959

6060
/** Number of offsets to process in a single task. */
61-
private static final int BATCH_SIZE = 500_000;
61+
private static final long BATCH_SIZE = 500_000;
6262

6363
private final int numThreads;
6464
private final Configuration conf;
6565
private final FileStoreFactory fileStoreFactory;
6666
private final RecordPathFactory pathFactory;
6767
private final long maxFilesPerTopic;
68+
private List<String> excludeTopics;
6869

6970
private LongAdder processedFileCount;
7071
private LongAdder processedRecordsCount;
@@ -78,6 +79,7 @@ public RadarHdfsRestructure(FileStoreFactory factory) {
7879
maxFiles = Long.MAX_VALUE;
7980
}
8081
this.maxFilesPerTopic = maxFiles;
82+
this.excludeTopics = factory.getSettings().getExcludeTopics();
8183
this.fileStoreFactory = factory;
8284
this.pathFactory = factory.getPathFactory();
8385
}
@@ -115,6 +117,7 @@ private List<TopicFileList> getTopicPaths(FileSystem fs, Path path, OffsetRangeS
115117
Map<String, List<TopicFile>> topics = walk(fs, path)
116118
.filter(f -> f.getName().endsWith(".avro"))
117119
.map(f -> new TopicFile(f.getParent().getParent().getName(), f))
120+
.filter(f -> !excludeTopics.contains(f.topic))
118121
.filter(f -> !seenFiles.contains(f.range))
119122
.collect(Collectors.groupingBy(TopicFile::getTopic));
120123

@@ -174,8 +177,8 @@ private void processPaths(List<TopicFileList> topicPaths, Accountant accountant)
174177
String topic = paths.files.get(0).topic;
175178
logger.info("Processing {} records for topic {}", size, topic);
176179
executor.execute(() -> {
177-
int batchSize = (int)(BATCH_SIZE * ThreadLocalRandom.current().nextDouble(0.75, 1.25));
178-
int currentSize = 0;
180+
long batchSize = Math.round(BATCH_SIZE * ThreadLocalRandom.current().nextDouble(0.75, 1.25));
181+
long currentSize = 0;
179182
try (FileCacheStore cache = fileStoreFactory.newFileCacheStore(accountant)) {
180183
for (TopicFile file : paths.files) {
181184
try {
@@ -280,7 +283,7 @@ private static class TopicFileList {
280283
public TopicFileList(Stream<TopicFile> files) {
281284
this.files = files.collect(Collectors.toList());
282285
this.size = this.files.stream()
283-
.mapToInt(TopicFile::size)
286+
.mapToLong(TopicFile::size)
284287
.sum();
285288
}
286289

@@ -308,8 +311,8 @@ public String getTopic() {
308311
return topic;
309312
}
310313

311-
public int size() {
312-
return 1 + (int) (range.getOffsetTo() - range.getOffsetFrom());
314+
public long size() {
315+
return 1 + range.getOffsetTo() - range.getOffsetFrom();
313316
}
314317
}
315318
}

0 commit comments

Comments
 (0)