Skip to content

Commit 09931bc

Browse files
committed
added support for running MLCP using a dependency to MLCP jar instead of running the mlcp executable
1 parent af32312 commit 09931bc

File tree

29 files changed

+1993
-72
lines changed

29 files changed

+1993
-72
lines changed

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
.DS_Store
22
.gradle/
33
.settings/
4-
bin/
4+
data-hub/bin/
5+
quick-start/bin/
56
build/
67
releases/
78
.classpath
89
.project
910
gradle-local.properties
1011
/quick-start/environment.properties
12+
/quick-start/assetInstallTime.json
13+
/quick-start/input
14+
/quick-start/plugins

data-hub/build.gradle

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ dependencies {
2626
compile 'com.marklogic:ml-app-deployer:2.0'
2727
compile 'commons-io:commons-io:2.4'
2828
compile 'com.google.code.gson:gson:2.6.1'
29+
compile("com.marklogic:mlcp:8.0-4") {
30+
exclude module : 'servlet-api'
31+
exclude group: 'com.google.guava', module: 'guava'
32+
}
33+
compile 'com.google.guava:guava:11.0.2'
2934
testCompile 'junit:junit:4.12'
3035
testCompile 'xmlunit:xmlunit:1.3'
3136
testCompile 'org.hamcrest:hamcrest-junit:2.0.0.0'
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package com.marklogic.hub;
2+
3+
import java.io.File;
4+
import java.util.List;
5+
6+
import com.marklogic.contentpump.ContentPump;
7+
import com.marklogic.contentpump.utilities.OptionsFileUtil;
8+
9+
public class DataHubContentPump extends ContentPump {
10+
11+
private String[] arguments;
12+
13+
public DataHubContentPump(List<String> arguments) {
14+
this(arguments.toArray(new String[0]));
15+
}
16+
17+
public DataHubContentPump(String[] arguments) {
18+
this.arguments = arguments;
19+
}
20+
21+
/**
22+
* Run the Content Pump.
23+
*
24+
* @return true if the content pump executed successfully, false otherwise.
25+
*/
26+
public boolean execute() {
27+
String[] expandedArgs = null;
28+
int rc = 1;
29+
try {
30+
expandedArgs = OptionsFileUtil.expandArguments(arguments);
31+
rc = runCommand(expandedArgs);
32+
} catch (Exception ex) {
33+
LOG.error("Error while expanding arguments", ex);
34+
System.err.println(ex.getMessage());
35+
System.err.println("Try 'mlcp help' for usage.");
36+
}
37+
38+
return rc == 0;
39+
}
40+
}

data-hub/src/main/java/com/marklogic/hub/Mlcp.java

Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,13 @@
2323
import org.slf4j.Logger;
2424
import org.slf4j.LoggerFactory;
2525

26-
import com.marklogic.hub.util.IOUtil;
27-
import com.marklogic.hub.util.IOUtil.LogLevel;
28-
2926
public class Mlcp {
3027
private static final Logger LOGGER = LoggerFactory.getLogger(Mlcp.class);
3128

29+
private final static String DEFAULT_HADOOP_HOME_DIR= "./hadoop/";
30+
3231
private List<MlcpSource> sources = new ArrayList<>();
3332

34-
private String mlcpPath;
35-
3633
private String host;
3734

3835
private String port;
@@ -41,21 +38,13 @@ public class Mlcp {
4138

4239
private String password;
4340

44-
public Mlcp(String mlcpHome, String host, String port, String user, String password) {
41+
public Mlcp(String host, String port, String user, String password) throws IOException {
4542
this.host = host;
4643
this.port = port;
4744
this.user = user;
4845
this.password = password;
49-
50-
// set the mlcp executable path based on OS
51-
this.mlcpPath = mlcpHome;
52-
String osName = System.getProperty("os.name");
53-
if (osName != null && osName.toLowerCase().startsWith("windows")) {
54-
mlcpPath += "/bin/mlcp.bat";
55-
}
56-
else {
57-
mlcpPath += "/bin/mlcp.sh";
58-
}
46+
47+
setHadoopHomeDir();
5948
}
6049

6150
public void addSourceDirectory(String directoryPath, SourceOptions options) {
@@ -70,7 +59,7 @@ public void loadContent() {
7059
try {
7160
List<String> arguments = new ArrayList<>();
7261

73-
arguments.add(mlcpPath);
62+
// arguments.add(mlcpPath);
7463
arguments.add("import");
7564
arguments.add("-mode");
7665
arguments.add("local");
@@ -86,17 +75,9 @@ public void loadContent() {
8675
// add arguments related to the source
8776
List<String> sourceArguments = source.getMlcpArguments();
8877
arguments.addAll(sourceArguments);
89-
90-
ProcessBuilder pb = new ProcessBuilder(arguments.toArray(new String[0]));
91-
Process process = pb.start();
92-
93-
inputThread = IOUtil.createInputStreamSink(process.getInputStream(), LOGGER, LogLevel.DEBUG);
94-
errorThread = IOUtil.createInputStreamSink(process.getErrorStream(), LOGGER, LogLevel.ERROR);
95-
96-
inputThread.start();
97-
errorThread.start();
98-
99-
process.waitFor();
78+
79+
DataHubContentPump contentPump = new DataHubContentPump(arguments);
80+
contentPump.execute();
10081
}
10182
catch (Exception e) {
10283
LOGGER.error("Failed to load {}", source.getSourcePath(), e);
@@ -111,6 +92,14 @@ public void loadContent() {
11192
}
11293
}
11394
}
95+
96+
protected void setHadoopHomeDir() throws IOException {
97+
String home = System.getProperty("hadoop.home.dir");
98+
if (home == null) {
99+
home = DEFAULT_HADOOP_HOME_DIR;
100+
}
101+
System.setProperty("hadoop.home.dir", new File(home).getCanonicalPath());
102+
}
114103

115104
private static class MlcpSource {
116105
private String sourcePath;

quick-start/hadoop/bin/hadoop

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
#!/usr/bin/env bash
2+
3+
# Licensed to the Apache Software Foundation (ASF) under one or more
4+
# contributor license agreements. See the NOTICE file distributed with
5+
# this work for additional information regarding copyright ownership.
6+
# The ASF licenses this file to You under the Apache License, Version 2.0
7+
# (the "License"); you may not use this file except in compliance with
8+
# the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
# This script runs the hadoop core commands.
19+
20+
bin=`which $0`
21+
bin=`dirname ${bin}`
22+
bin=`cd "$bin"; pwd`
23+
24+
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
25+
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
26+
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
27+
28+
function print_usage(){
29+
echo "Usage: hadoop [--config confdir] COMMAND"
30+
echo " where COMMAND is one of:"
31+
echo " fs run a generic filesystem user client"
32+
echo " version print the version"
33+
echo " jar <jar> run a jar file"
34+
echo " checknative [-a|-h] check native hadoop and compression libraries availability"
35+
echo " distcp <srcurl> <desturl> copy file or directories recursively"
36+
echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
37+
echo " classpath prints the class path needed to get the"
38+
echo " credential interact with credential providers"
39+
echo " Hadoop jar and the required libraries"
40+
echo " daemonlog get/set the log level for each daemon"
41+
echo " trace view and modify Hadoop tracing settings"
42+
echo " or"
43+
echo " CLASSNAME run the class named CLASSNAME"
44+
echo ""
45+
echo "Most commands print help when invoked w/o parameters."
46+
}
47+
48+
if [ $# = 0 ]; then
49+
print_usage
50+
exit
51+
fi
52+
53+
COMMAND=$1
54+
case $COMMAND in
55+
# usage flags
56+
--help|-help|-h)
57+
print_usage
58+
exit
59+
;;
60+
61+
#hdfs commands
62+
namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups|portmap|nfs3)
63+
echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2
64+
echo "Instead use the hdfs command for it." 1>&2
65+
echo "" 1>&2
66+
#try to locate hdfs and if present, delegate to it.
67+
shift
68+
if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
69+
exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
70+
elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
71+
exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
72+
else
73+
echo "HADOOP_HDFS_HOME not found!"
74+
exit 1
75+
fi
76+
;;
77+
78+
#mapred commands for backwards compatibility
79+
pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker)
80+
echo "DEPRECATED: Use of this script to execute mapred command is deprecated." 1>&2
81+
echo "Instead use the mapred command for it." 1>&2
82+
echo "" 1>&2
83+
#try to locate mapred and if present, delegate to it.
84+
shift
85+
if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
86+
exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
87+
elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
88+
exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
89+
else
90+
echo "HADOOP_MAPRED_HOME not found!"
91+
exit 1
92+
fi
93+
;;
94+
95+
#core commands
96+
*)
97+
# the core commands
98+
if [ "$COMMAND" = "fs" ] ; then
99+
CLASS=org.apache.hadoop.fs.FsShell
100+
elif [ "$COMMAND" = "version" ] ; then
101+
CLASS=org.apache.hadoop.util.VersionInfo
102+
elif [ "$COMMAND" = "jar" ] ; then
103+
CLASS=org.apache.hadoop.util.RunJar
104+
elif [ "$COMMAND" = "key" ] ; then
105+
CLASS=org.apache.hadoop.crypto.key.KeyShell
106+
elif [ "$COMMAND" = "checknative" ] ; then
107+
CLASS=org.apache.hadoop.util.NativeLibraryChecker
108+
elif [ "$COMMAND" = "distcp" ] ; then
109+
CLASS=org.apache.hadoop.tools.DistCp
110+
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
111+
elif [ "$COMMAND" = "daemonlog" ] ; then
112+
CLASS=org.apache.hadoop.log.LogLevel
113+
elif [ "$COMMAND" = "archive" ] ; then
114+
CLASS=org.apache.hadoop.tools.HadoopArchives
115+
CLASSPATH=${CLASSPATH}:${TOOL_PATH}
116+
elif [ "$COMMAND" = "credential" ] ; then
117+
CLASS=org.apache.hadoop.security.alias.CredentialShell
118+
elif [ "$COMMAND" = "trace" ] ; then
119+
CLASS=org.apache.hadoop.tracing.TraceAdmin
120+
elif [ "$COMMAND" = "classpath" ] ; then
121+
if [ "$#" -eq 1 ]; then
122+
# No need to bother starting up a JVM for this simple case.
123+
echo $CLASSPATH
124+
exit
125+
else
126+
CLASS=org.apache.hadoop.util.Classpath
127+
fi
128+
elif [[ "$COMMAND" = -* ]] ; then
129+
# class and package names cannot begin with a -
130+
echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
131+
exit 1
132+
else
133+
CLASS=$COMMAND
134+
fi
135+
shift
136+
137+
# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
138+
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
139+
140+
#make sure security appender is turned off
141+
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
142+
143+
export CLASSPATH=$CLASSPATH
144+
exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
145+
;;
146+
147+
esac

0 commit comments

Comments
 (0)