Skip to content

Commit 54eec0f

Browse files
author
Bruce An
committed
Merge branch 'develop' of https://github.com/marklogic/marklogic-data-hub into bugfix/dhfprod-2407
2 parents fd55c38 + 7340362 commit 54eec0f

File tree

40 files changed

+2638
-58
lines changed

40 files changed

+2638
-58
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
This DHF 5 project shows an example of a simple flow with one ingestion step that utilizes a custom hook. The custom
2+
hook provides a chance for logic that potentially may perform updates to run outside of the main DHF transaction, thus
3+
ensuring that transaction can remain read-only.
4+
5+
The custom hook is defined at src/main/ml-modules/root/custom-modules/ingestion/IngestOrders/archive-hook.sjs, and it
6+
is associated with the flow via flows/LoadOrders.flow.json.
7+
8+
To see the hook in action, you can either use QuickStart to deploy this application and run the "LoadOrders" flow at
9+
least twice, or you can run the following Gradle tasks to deploy the application and run the flow twice:
10+
11+
./gradlew mlDeploy
12+
./gradlew loadOrders
13+
./gradlew loadOrders
14+
15+
In either case, when the flow is run the second time, the order documents created during the first run will be
16+
archived - which in this simple example means being written to a different URI in an archive collection.
17+
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
buildscript {
2+
repositories {
3+
mavenCentral()
4+
jcenter()
5+
mavenLocal()
6+
}
7+
dependencies {
8+
classpath("com.marklogic:ml-data-hub:${mlDHFVersion}")
9+
}
10+
}
11+
plugins {
12+
id 'net.saliman.properties' version '1.4.6'
13+
}
14+
15+
apply plugin: 'com.marklogic.ml-data-hub'
16+
17+
repositories {
18+
jcenter()
19+
maven { url "http://developer.marklogic.com/maven2/" }
20+
}
21+
22+
configurations {
23+
mlcp
24+
}
25+
26+
dependencies {
27+
mlcp "com.marklogic:mlcp:9.0.9"
28+
mlcp files("lib")
29+
}
30+
31+
task loadOrders(type: com.marklogic.gradle.task.MlcpTask) {
32+
description = "Example of executing a flow via mlcp instead of via QuickStart"
33+
classpath = configurations.mlcp
34+
command = "IMPORT"
35+
port = Integer.parseInt(mlStagingPort)
36+
input_file_path = "input/orders"
37+
input_file_type = "delimited_text"
38+
document_type = "json"
39+
transform_module = "/data-hub/5/transforms/mlcp-flow-transform.sjs"
40+
transform_param = "flow-name=LoadOrders"
41+
output_collections = "IngestOrders"
42+
output_permissions = "rest-reader,read,rest-writer,update"
43+
output_uri_suffix = ".json"
44+
output_uri_replace = ".*/input,''"
45+
generate_uri = true
46+
thread_count = 24
47+
}
48+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"info" : {
3+
"title" : "Order",
4+
"version" : "0.0.1"
5+
},
6+
"definitions" : {
7+
"Order" : {
8+
"required" : [ ],
9+
"pii" : [ ],
10+
"elementRangeIndex" : [ ],
11+
"rangeIndex" : [ ],
12+
"wordLexicon" : [ ],
13+
"properties" : { }
14+
}
15+
}
16+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"name" : "LoadOrders",
3+
"description" : "",
4+
"batchSize" : 100,
5+
"threadCount" : 4,
6+
"stopOnError" : false,
7+
"options" : { },
8+
"version" : 0,
9+
"steps" : {
10+
"1" : {
11+
"name" : "IngestOrders",
12+
"description" : "",
13+
"options" : {
14+
"sourceQuery" : "",
15+
"collections" : [ "IngestOrders" ],
16+
"permissions" : "rest-reader,read,rest-writer,update",
17+
"outputFormat" : "json",
18+
"targetDatabase" : "data-hub-STAGING"
19+
},
20+
"customHook" : {
21+
"module" : "/custom-modules/ingestion/IngestOrders/archive-hook.sjs",
22+
"parameters" : {
23+
"archiveCollection" : "Archive"
24+
},
25+
"user" : "flow-operator",
26+
"runBefore" : false
27+
},
28+
"retryLimit" : null,
29+
"batchSize" : null,
30+
"threadCount" : null,
31+
"stepDefinitionName" : "default-ingestion",
32+
"stepDefinitionType" : "INGESTION",
33+
"fileLocations" : {
34+
"inputFilePath" : "input/orders",
35+
"inputFileType" : "csv",
36+
"outputURIReplacement" : ".*/input,''"
37+
}
38+
}
39+
}
40+
}
53.9 KB
Binary file not shown.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
distributionBase=GRADLE_USER_HOME
2+
distributionPath=wrapper/dists
3+
distributionUrl=https\://services.gradle.org/distributions/gradle-5.2.1-bin.zip
4+
zipStoreBase=GRADLE_USER_HOME
5+
zipStorePath=wrapper/dists

examples/dhf5-custom-hook/gradlew

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
#!/usr/bin/env sh
2+
3+
##############################################################################
4+
##
5+
## Gradle start up script for UN*X
6+
##
7+
##############################################################################
8+
9+
# Attempt to set APP_HOME
10+
# Resolve links: $0 may be a link
11+
PRG="$0"
12+
# Need this for relative symlinks.
13+
while [ -h "$PRG" ] ; do
14+
ls=`ls -ld "$PRG"`
15+
link=`expr "$ls" : '.*-> \(.*\)$'`
16+
if expr "$link" : '/.*' > /dev/null; then
17+
PRG="$link"
18+
else
19+
PRG=`dirname "$PRG"`"/$link"
20+
fi
21+
done
22+
SAVED="`pwd`"
23+
cd "`dirname \"$PRG\"`/" >/dev/null
24+
APP_HOME="`pwd -P`"
25+
cd "$SAVED" >/dev/null
26+
27+
APP_NAME="Gradle"
28+
APP_BASE_NAME=`basename "$0"`
29+
30+
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31+
DEFAULT_JVM_OPTS=""
32+
33+
# Use the maximum available, or set MAX_FD != -1 to use that value.
34+
MAX_FD="maximum"
35+
36+
warn ( ) {
37+
echo "$*"
38+
}
39+
40+
die ( ) {
41+
echo
42+
echo "$*"
43+
echo
44+
exit 1
45+
}
46+
47+
# OS specific support (must be 'true' or 'false').
48+
cygwin=false
49+
msys=false
50+
darwin=false
51+
nonstop=false
52+
case "`uname`" in
53+
CYGWIN* )
54+
cygwin=true
55+
;;
56+
Darwin* )
57+
darwin=true
58+
;;
59+
MINGW* )
60+
msys=true
61+
;;
62+
NONSTOP* )
63+
nonstop=true
64+
;;
65+
esac
66+
67+
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
68+
69+
# Determine the Java command to use to start the JVM.
70+
if [ -n "$JAVA_HOME" ] ; then
71+
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
72+
# IBM's JDK on AIX uses strange locations for the executables
73+
JAVACMD="$JAVA_HOME/jre/sh/java"
74+
else
75+
JAVACMD="$JAVA_HOME/bin/java"
76+
fi
77+
if [ ! -x "$JAVACMD" ] ; then
78+
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
79+
80+
Please set the JAVA_HOME variable in your environment to match the
81+
location of your Java installation."
82+
fi
83+
else
84+
JAVACMD="java"
85+
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
86+
87+
Please set the JAVA_HOME variable in your environment to match the
88+
location of your Java installation."
89+
fi
90+
91+
# Increase the maximum file descriptors if we can.
92+
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
93+
MAX_FD_LIMIT=`ulimit -H -n`
94+
if [ $? -eq 0 ] ; then
95+
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
96+
MAX_FD="$MAX_FD_LIMIT"
97+
fi
98+
ulimit -n $MAX_FD
99+
if [ $? -ne 0 ] ; then
100+
warn "Could not set maximum file descriptor limit: $MAX_FD"
101+
fi
102+
else
103+
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104+
fi
105+
fi
106+
107+
# For Darwin, add options to specify how the application appears in the dock
108+
if $darwin; then
109+
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110+
fi
111+
112+
# For Cygwin, switch paths to Windows format before running java
113+
if $cygwin ; then
114+
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115+
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116+
JAVACMD=`cygpath --unix "$JAVACMD"`
117+
118+
# We build the pattern for arguments to be converted via cygpath
119+
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120+
SEP=""
121+
for dir in $ROOTDIRSRAW ; do
122+
ROOTDIRS="$ROOTDIRS$SEP$dir"
123+
SEP="|"
124+
done
125+
OURCYGPATTERN="(^($ROOTDIRS))"
126+
# Add a user-defined pattern to the cygpath arguments
127+
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128+
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129+
fi
130+
# Now convert the arguments - kludge to limit ourselves to /bin/sh
131+
i=0
132+
for arg in "$@" ; do
133+
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134+
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135+
136+
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137+
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138+
else
139+
eval `echo args$i`="\"$arg\""
140+
fi
141+
i=$((i+1))
142+
done
143+
case $i in
144+
(0) set -- ;;
145+
(1) set -- "$args0" ;;
146+
(2) set -- "$args0" "$args1" ;;
147+
(3) set -- "$args0" "$args1" "$args2" ;;
148+
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149+
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150+
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151+
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152+
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153+
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154+
esac
155+
fi
156+
157+
# Escape application args
158+
save ( ) {
159+
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160+
echo " "
161+
}
162+
APP_ARGS=$(save "$@")
163+
164+
# Collect all arguments for the java command, following the shell quoting and substitution rules
165+
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166+
167+
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168+
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169+
cd "$(dirname "$0")"
170+
fi
171+
172+
exec "$JAVACMD" "$@"
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
@if "%DEBUG%" == "" @echo off
2+
@rem ##########################################################################
3+
@rem
4+
@rem Gradle startup script for Windows
5+
@rem
6+
@rem ##########################################################################
7+
8+
@rem Set local scope for the variables with windows NT shell
9+
if "%OS%"=="Windows_NT" setlocal
10+
11+
set DIRNAME=%~dp0
12+
if "%DIRNAME%" == "" set DIRNAME=.
13+
set APP_BASE_NAME=%~n0
14+
set APP_HOME=%DIRNAME%
15+
16+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17+
set DEFAULT_JVM_OPTS=
18+
19+
@rem Find java.exe
20+
if defined JAVA_HOME goto findJavaFromJavaHome
21+
22+
set JAVA_EXE=java.exe
23+
%JAVA_EXE% -version >NUL 2>&1
24+
if "%ERRORLEVEL%" == "0" goto init
25+
26+
echo.
27+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28+
echo.
29+
echo Please set the JAVA_HOME variable in your environment to match the
30+
echo location of your Java installation.
31+
32+
goto fail
33+
34+
:findJavaFromJavaHome
35+
set JAVA_HOME=%JAVA_HOME:"=%
36+
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37+
38+
if exist "%JAVA_EXE%" goto init
39+
40+
echo.
41+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42+
echo.
43+
echo Please set the JAVA_HOME variable in your environment to match the
44+
echo location of your Java installation.
45+
46+
goto fail
47+
48+
:init
49+
@rem Get command-line arguments, handling Windows variants
50+
51+
if not "%OS%" == "Windows_NT" goto win9xME_args
52+
53+
:win9xME_args
54+
@rem Slurp the command line arguments.
55+
set CMD_LINE_ARGS=
56+
set _SKIP=2
57+
58+
:win9xME_args_slurp
59+
if "x%~1" == "x" goto execute
60+
61+
set CMD_LINE_ARGS=%*
62+
63+
:execute
64+
@rem Setup the command line
65+
66+
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67+
68+
@rem Execute Gradle
69+
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70+
71+
:end
72+
@rem End local scope for the variables with windows NT shell
73+
if "%ERRORLEVEL%"=="0" goto mainEnd
74+
75+
:fail
76+
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77+
rem the _cmd.exe /c_ return code!
78+
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79+
exit /b 1
80+
81+
:mainEnd
82+
if "%OS%"=="Windows_NT" endlocal
83+
84+
:omega

0 commit comments

Comments
 (0)