Skip to content

Commit 1274311

Browse files
authored
Merge pull request #11 from PastorGL/feature-release_3.8
Release 3.8
2 parents fe94c96 + 4a4f91c commit 1274311

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1897
-518
lines changed

cli/pom.xml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,17 @@
144144
<version>3.23.0</version>
145145
</dependency>
146146

147+
<dependency>
148+
<groupId>io.logz</groupId>
149+
<artifactId>guice-jersey</artifactId>
150+
<version>1.1.05</version>
151+
</dependency>
152+
<dependency>
153+
<groupId>org.glassfish.jersey.media</groupId>
154+
<artifactId>jersey-media-json-jackson</artifactId>
155+
<version>2.36</version>
156+
</dependency>
157+
147158
<dependency>
148159
<groupId>org.apache.velocity</groupId>
149160
<artifactId>velocity-engine-core</artifactId>
@@ -277,6 +288,12 @@
277288
<include>com.uber</include>
278289
<include>io.jenetics</include>
279290
<include>de.undercouch:bson4jackson</include>
291+
<include>io.logz</include>
292+
<include>org.glassfish.hk2:guice-bridge</include>
293+
<include>org.eclipse.jetty</include>
294+
<include>com.google.inject</include>
295+
<include>org.glassfish.jersey.media:jersey-media-json-jackson</include>
296+
<include>org.glassfish.jersey.ext:jersey-entity-filtering</include>
280297
</includes>
281298
</artifactSet>
282299
</configuration>

cli/src/main/java/io/github/pastorgl/datacooker/cli/Configuration.java

Lines changed: 14 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -4,136 +4,37 @@
44
*/
55
package io.github.pastorgl.datacooker.cli;
66

7-
import io.github.pastorgl.datacooker.config.InvalidConfigurationException;
8-
import io.github.pastorgl.datacooker.scripting.VariablesContext;
97
import org.apache.commons.cli.*;
10-
import org.apache.hadoop.fs.Path;
11-
import org.apache.spark.api.java.JavaSparkContext;
12-
import scala.Tuple2;
138

14-
import java.io.StringReader;
15-
import java.util.*;
9+
import java.util.ListIterator;
1610

1711
public class Configuration {
1812
protected Options options;
1913
protected CommandLine commandLine;
2014

21-
private HelpFormatter hf = new HelpFormatter();
15+
private final HelpFormatter hf = new HelpFormatter();
2216

2317
public Configuration() {
2418
options = new Options();
2519
hf.setOptionComparator(null);
2620

27-
addOption("h", "help", false, "Print a list of command line options and exit");
28-
addOption("s", "script", true, "TDL4 script file");
29-
addOption("d", "dry", false, "Dry run: only check script syntax and print errors to console, if found");
21+
addOption("h", "help", false, "Print full list of command line options and exit");
22+
addOption("s", "script", true, "TDL4 script file. Mandatory for batch modes");
3023
addOption("v", "variablesFile", true, "Path to variables file, name=value pairs per each line");
3124
addOption("V", "variables", true, "Pass contents of variables file encoded as Base64");
32-
addOption("l", "local", false, "Run in local mode (its options have no effect otherwise)");
25+
addOption("l", "local", false, "Run in local batch mode (cluster batch mode otherwise)");
26+
addOption("d", "dry", false, "-l: Dry run (only check script syntax and print errors to console, if found)");
3327
addOption("m", "driverMemory", true, "-l: Driver memory, by default Spark uses 1g");
3428
addOption("u", "sparkUI", false, "-l: Enable Spark UI, by default it is disabled");
3529
addOption("L", "localCores", true, "-l: Set cores #, by default * (all cores)");
36-
addOption("R", "repl", false, "Run in local mode with interactive REPL interface. -s is optional");
37-
addOption("i", "history", true, "-R: Set history file location");
38-
}
39-
40-
public VariablesContext variables(JavaSparkContext context) throws Exception {
41-
StringBuilder variablesSource = new StringBuilder();
42-
if (hasOption("v")) {
43-
String variablesFile = getOptionValue("v");
44-
45-
Path sourcePath = new Path(variablesFile);
46-
String qualifiedPath = sourcePath.getFileSystem(context.hadoopConfiguration()).makeQualified(sourcePath).toString();
47-
48-
int lastSlash = variablesFile.lastIndexOf('/');
49-
variablesFile = (lastSlash < 0) ? variablesFile : variablesFile.substring(0, lastSlash);
50-
51-
variablesSource.append(context.wholeTextFiles(variablesFile)
52-
.filter(t -> t._1.equals(qualifiedPath))
53-
.map(Tuple2::_2)
54-
.first());
55-
}
56-
if (hasOption("V")) {
57-
variablesSource.append("\n");
58-
variablesSource.append(new String(Base64.getDecoder().decode(getOptionValue("V"))));
59-
}
60-
61-
Properties properties = new Properties();
62-
if (variablesSource.length() > 0) {
63-
properties.load(new StringReader(variablesSource.toString()));
64-
}
65-
66-
Map<String, Object> variables = new HashMap<>();
67-
for (Map.Entry e : properties.entrySet()) {
68-
String key = String.valueOf(e.getKey());
69-
Object v = e.getValue();
70-
String value = String.valueOf(v);
71-
72-
int last = value.length() - 1;
73-
if ((value.indexOf('[') == 0) && (value.lastIndexOf(']') == last)) {
74-
value = value.substring(1, last);
75-
76-
if (value.contains("'")) {
77-
boolean inString = false;
78-
List<String> strings = new ArrayList<>();
79-
StringBuilder cur = null;
80-
for (int i = 0, len = value.length(); i < len; i++) {
81-
char c = value.charAt(i);
82-
if (inString) {
83-
if (c != '\'') {
84-
cur.append(c);
85-
} else { // c == '
86-
if ((i + 1) < len) {
87-
if (value.charAt(i + 1) != '\'') {
88-
inString = false;
89-
strings.add(cur.toString());
90-
} else {
91-
cur.append("'");
92-
i++;
93-
}
94-
} else {
95-
strings.add(cur.toString());
96-
}
97-
}
98-
} else {
99-
if (c == '\'') {
100-
inString = true;
101-
cur = new StringBuilder();
102-
}
103-
}
104-
}
105-
106-
v = strings.toArray();
107-
} else {
108-
String[] vv = value.split(",");
109-
v = Arrays.stream(vv).map(vvv -> Double.parseDouble(vvv.trim())).toArray();
110-
}
111-
} else if ((value.indexOf('\'') == 0) && (value.lastIndexOf('\'') == last)) {
112-
v = value.substring(1, last);
113-
}
114-
variables.put(key, v);
115-
}
116-
117-
VariablesContext variablesContext = new VariablesContext();
118-
variablesContext.putAll(variables);
119-
return variablesContext;
120-
}
121-
122-
public String script(JavaSparkContext context, String sourceFile) {
123-
try {
124-
Path sourcePath = new Path(sourceFile);
125-
String qualifiedPath = sourcePath.getFileSystem(context.hadoopConfiguration()).makeQualified(sourcePath).toString();
126-
127-
int lastSlash = sourceFile.lastIndexOf('/');
128-
sourceFile = (lastSlash < 0) ? sourceFile : sourceFile.substring(0, lastSlash);
129-
130-
return context.wholeTextFiles(sourceFile)
131-
.filter(t -> t._1.equals(qualifiedPath))
132-
.map(Tuple2::_2)
133-
.first();
134-
} catch (Exception e) {
135-
throw new InvalidConfigurationException("Error while reading TDL4 script file");
136-
}
30+
addOption("R", "repl", false, "Run in local mode with interactive REPL interface. Implies -l. -s is optional");
31+
addOption("i", "history", true, "-R, -r: Set history file location");
32+
addOption("e", "serveRepl", false, "Start REPL server in local or cluster mode. -s is optional");
33+
addOption("r", "remoteRepl", false, "Connect to a remote REPL server");
34+
addOption("i", "host", true, "Use specified network address:\n" +
35+
"-e: to listen at (default is all)\n" +
36+
"-r: to connect to (in this case, mandatory parameter)");
37+
addOption("p", "port", true, "-e, -r: Use specified port to listen at or connect to. Default is 9595");
13738
}
13839

13940
public void addOption(String opt, String longOpt, boolean hasArg, String description) {
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
/**
2+
* Copyright (C) 2023 Data Cooker Team and Contributors
3+
* This project uses New BSD license with do no evil clause. For full text, check the LICENSE file in the root directory.
4+
*/
5+
package io.github.pastorgl.datacooker.cli;
6+
7+
import io.github.pastorgl.datacooker.RegisteredPackages;
8+
import io.github.pastorgl.datacooker.data.Transforms;
9+
import io.github.pastorgl.datacooker.scripting.Operations;
10+
import io.github.pastorgl.datacooker.scripting.VariablesContext;
11+
import io.github.pastorgl.datacooker.storage.Adapters;
12+
import org.apache.commons.lang3.StringUtils;
13+
import org.apache.hadoop.fs.Path;
14+
import org.apache.spark.api.java.JavaSparkContext;
15+
import scala.Function1;
16+
import scala.Tuple2;
17+
18+
import java.io.StringReader;
19+
import java.net.URL;
20+
import java.util.*;
21+
import java.util.jar.Manifest;
22+
23+
import static io.github.pastorgl.datacooker.cli.Main.LOG;
24+
25+
public class Helper {
26+
static public void populateEntities() {
27+
log(new String[]{
28+
RegisteredPackages.REGISTERED_PACKAGES.size() + " Registered Packages",
29+
Adapters.INPUTS.size() + " Input Adapters",
30+
Transforms.TRANSFORMS.size() + " Transforms",
31+
Operations.OPERATIONS.size() + " Operations",
32+
Adapters.OUTPUTS.size() + " Output Adapters"
33+
});
34+
}
35+
36+
static public void log(String[] msg, Object... err) {
37+
Function1<String, Void> lf = (err.length > 0)
38+
? (m) -> {
39+
LOG.error(m);
40+
return null;
41+
}
42+
: (m) -> {
43+
LOG.warn(m);
44+
return null;
45+
};
46+
int len = Arrays.stream(msg).map(String::length).max(Integer::compareTo).orElse(20);
47+
lf.apply(StringUtils.repeat("=", len));
48+
Arrays.stream(msg).forEach(lf::apply);
49+
lf.apply(StringUtils.repeat("=", len));
50+
}
51+
52+
public static String loadScript(String sourceFile, JavaSparkContext context) {
53+
try {
54+
Path sourcePath = new Path(sourceFile);
55+
String qualifiedPath = sourcePath.getFileSystem(context.hadoopConfiguration()).makeQualified(sourcePath).toString();
56+
57+
int lastSlash = sourceFile.lastIndexOf('/');
58+
sourceFile = (lastSlash < 0) ? sourceFile : sourceFile.substring(0, lastSlash);
59+
60+
return context.wholeTextFiles(sourceFile)
61+
.filter(t -> t._1.equals(qualifiedPath))
62+
.map(Tuple2::_2)
63+
.first();
64+
} catch (Exception e) {
65+
throw new RuntimeException("Error while reading TDL4 script file");
66+
}
67+
}
68+
69+
public static VariablesContext loadVariables(Configuration config, JavaSparkContext context) throws Exception {
70+
StringBuilder variablesSource = new StringBuilder();
71+
if (config.hasOption("v")) {
72+
String variablesFile = config.getOptionValue("v");
73+
74+
Path sourcePath = new Path(variablesFile);
75+
String qualifiedPath = sourcePath.getFileSystem(context.hadoopConfiguration()).makeQualified(sourcePath).toString();
76+
77+
int lastSlash = variablesFile.lastIndexOf('/');
78+
variablesFile = (lastSlash < 0) ? variablesFile : variablesFile.substring(0, lastSlash);
79+
80+
variablesSource.append(context.wholeTextFiles(variablesFile)
81+
.filter(t -> t._1.equals(qualifiedPath))
82+
.map(Tuple2::_2)
83+
.first());
84+
}
85+
if (config.hasOption("V")) {
86+
variablesSource.append("\n");
87+
variablesSource.append(new String(Base64.getDecoder().decode(config.getOptionValue("V"))));
88+
}
89+
90+
Properties properties = new Properties();
91+
if (variablesSource.length() > 0) {
92+
properties.load(new StringReader(variablesSource.toString()));
93+
}
94+
95+
Map<String, Object> variables = new HashMap<>();
96+
for (Map.Entry e : properties.entrySet()) {
97+
String key = String.valueOf(e.getKey());
98+
Object v = e.getValue();
99+
String value = String.valueOf(v);
100+
101+
int last = value.length() - 1;
102+
if ((value.indexOf('[') == 0) && (value.lastIndexOf(']') == last)) {
103+
value = value.substring(1, last);
104+
105+
if (value.contains("'")) {
106+
boolean inString = false;
107+
List<String> strings = new ArrayList<>();
108+
StringBuilder cur = null;
109+
for (int i = 0, len = value.length(); i < len; i++) {
110+
char c = value.charAt(i);
111+
if (inString) {
112+
if (c != '\'') {
113+
cur.append(c);
114+
} else { // c == '
115+
if ((i + 1) < len) {
116+
if (value.charAt(i + 1) != '\'') {
117+
inString = false;
118+
strings.add(cur.toString());
119+
} else {
120+
cur.append("'");
121+
i++;
122+
}
123+
} else {
124+
strings.add(cur.toString());
125+
}
126+
}
127+
} else {
128+
if (c == '\'') {
129+
inString = true;
130+
cur = new StringBuilder();
131+
}
132+
}
133+
}
134+
135+
v = strings.toArray();
136+
} else {
137+
String[] vv = value.split(",");
138+
v = Arrays.stream(vv).map(vvv -> Double.parseDouble(vvv.trim())).toArray();
139+
}
140+
} else if ((value.indexOf('\'') == 0) && (value.lastIndexOf('\'') == last)) {
141+
v = value.substring(1, last);
142+
}
143+
variables.put(key, v);
144+
}
145+
146+
VariablesContext variablesContext = new VariablesContext();
147+
variablesContext.putAll(variables);
148+
return variablesContext;
149+
}
150+
151+
public static String getVersion() {
152+
try {
153+
URL url = Main.class.getClassLoader().getResource("META-INF/MANIFEST.MF");
154+
Manifest man = new Manifest(url.openStream());
155+
156+
return man.getMainAttributes().getValue("Implementation-Version");
157+
} catch (Exception ignore) {
158+
return "unknown";
159+
}
160+
}
161+
}

0 commit comments

Comments
 (0)