diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0f19880f0..15f4bb911 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,17 @@
# Changelog
+## 6.4.2 (09/19/2022)
+
+### Bug Fixes:
+
+- [#870](https://github.com/greenplum-db/pxf/pull/870) Relax the requirement that C string length matches Java string length
+
+## 6.4.1 (09/16/2022)
+
+### Bug Fixes:
+
+- [#858](https://github.com/greenplum-db/pxf/pull/858) Add JsonProtocolHandler to use HdfsFileFragmenter for multi-line JSON
+
## 6.4.0 (08/15/2022)
### Enhancements:
diff --git a/automation/pom.xml b/automation/pom.xml
index a87397484..2b4fba4f2 100644
--- a/automation/pom.xml
+++ b/automation/pom.xml
@@ -231,12 +231,6 @@
1.10.11
-
- org.jsoup
- jsoup
- 1.14.2
-
-
com.amazonaws
diff --git a/automation/src/main/java/org/greenplum/pxf/automation/components/cluster/PhdCluster.java b/automation/src/main/java/org/greenplum/pxf/automation/components/cluster/PhdCluster.java
index c5e4b3be8..c67e4347b 100755
--- a/automation/src/main/java/org/greenplum/pxf/automation/components/cluster/PhdCluster.java
+++ b/automation/src/main/java/org/greenplum/pxf/automation/components/cluster/PhdCluster.java
@@ -79,7 +79,8 @@ public void init() throws Exception {
public void addPathToPxfClassPath(String path) throws Exception {
String content = "export PXF_LOADER_PATH=file:" + path;
// path to local fetch pxf class file
- File pathToLocalClassPathFile = new File(getPxfBase() + "/conf", getPxfConfigurationFile());
+
+ File pathToLocalClassPathFile = new File(getPathToLocalPxfConfDirectory(), getPxfConfigurationFile());
ReportUtils.report(report, getClass(), "Add " + content + " to PXF class path (" + pathToLocalClassPathFile.getAbsolutePath() + ")");
// read file content
String pxfClasspathContent = new String(Files.readAllBytes(Paths.get(pathToLocalClassPathFile.getAbsolutePath())));
diff --git a/automation/src/main/java/org/greenplum/pxf/automation/components/cluster/installer/nodes/InstallationNode.java b/automation/src/main/java/org/greenplum/pxf/automation/components/cluster/installer/nodes/InstallationNode.java
deleted file mode 100755
index 6b22ab132..000000000
--- a/automation/src/main/java/org/greenplum/pxf/automation/components/cluster/installer/nodes/InstallationNode.java
+++ /dev/null
@@ -1,93 +0,0 @@
-package org.greenplum.pxf.automation.components.cluster.installer.nodes;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.Comparator;
-
-import org.apache.commons.configuration.ConfigurationException;
-import org.apache.commons.lang.StringUtils;
-import org.jsoup.Jsoup;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import org.greenplum.pxf.automation.components.common.cli.ShellCommandErrorException;
-
-/**
- * Represents the node that running the installation of the cluster
- */
-public class InstallationNode extends Node {
- // server for downloading the required builds
- private String downloadServer = "http://dist.dh.greenplum.com/dist/PHD/testing/";
- // path to resources in the installation node
- private String resourceDirectory = "src/test/resources/templates/";
-
- /**
- * Get the latest build for given buildName
- *
- * @param buildName required build type
- * @return latest build from given buildName
- * @throws IOException
- * @throws ShellCommandErrorException
- * @throws ConfigurationException
- */
- private String getLatestBuilds(final String buildName) throws IOException, ShellCommandErrorException, ConfigurationException {
- // connect to server and get list of all files
- Document doc = Jsoup.connect(downloadServer).get();
- // select only the match files according to buildName
- Elements el = doc.select("a[href]:matches(" + buildName + "-\\d+)");
-
- // sort files elements
- Collections.sort(el, new Comparator() {
- @Override
- public int compare(Element e1, Element e2) {
- // leave only the build number X (1.2.0.1-X) and compare it as integer
- return Integer.valueOf(e1.text().replaceAll(buildName + "-", "").replaceAll(".tar.gz", "")).compareTo(Integer.valueOf(e2.text()
- .replaceAll(buildName + "-", "")
- .replaceAll(".tar.gz", "")));
- }
- });
-
- // return latest build name
- return el.last().text().replaceAll(".tar.gz", "");
- }
-
- /**
- * Get the required version for given buildPattern: if a fixed version is given than return it,
- * "--" mean that the latest from the given version will be returned.
- *
- * @param buildPattern required build and version
- * @return required build according to given buildPattern
- * @throws ConfigurationException
- * @throws IOException
- * @throws ShellCommandErrorException
- */
- public String getRequiredVersion(String buildPattern) throws ConfigurationException, IOException, ShellCommandErrorException {
- // if buildPattern is empty return null
- if (!StringUtils.isEmpty(buildPattern)) {
- // if ends with "-"return the latest from required version, else return fixed build
- if (buildPattern.endsWith("-")) {
- return getLatestBuilds(buildPattern.substring(0, buildPattern.length() - 1));
- } else {
- return buildPattern;
- }
- }
- return null;
- }
-
- public String getDownloadServer() {
- return downloadServer;
- }
-
- public void setDownloadServer(String downloadServer) {
- this.downloadServer = downloadServer;
- }
-
- public String getResourceDirectory() {
- return resourceDirectory;
- }
-
- public void setResourceDirectory(String resourceDirectory) {
- this.resourceDirectory = resourceDirectory;
- }
-}
\ No newline at end of file
diff --git a/automation/src/test/java/org/greenplum/pxf/automation/features/orc/OrcReadTest.java b/automation/src/test/java/org/greenplum/pxf/automation/features/orc/OrcReadTest.java
index 1e0feed3f..de41d36bf 100644
--- a/automation/src/test/java/org/greenplum/pxf/automation/features/orc/OrcReadTest.java
+++ b/automation/src/test/java/org/greenplum/pxf/automation/features/orc/OrcReadTest.java
@@ -13,6 +13,7 @@ public class OrcReadTest extends BaseFeature {
private static final String ORC_PRIMITIVE_TYPES_UNORDERED_SUBSET = "orc_types_unordered_subset.orc";
private static final String ORC_LIST_TYPES = "orc_list_types.orc";
private static final String ORC_MULTIDIM_LIST_TYPES = "orc_multidim_list_types.orc";
+ private static final String ORC_NULL_IN_STRING = "orc_null_in_string.orc";
private static final String[] ORC_TABLE_COLUMNS = {
"id integer",
@@ -73,6 +74,12 @@ public class OrcReadTest extends BaseFeature {
"varchar_arr text[]"
};
+ private static final String[] ORC_NULL_IN_STRING_COLUMNS = new String[]{
+ "id int",
+ "context text",
+ "value text"
+ };
+
private String hdfsPath;
private ProtocolEnum protocol;
@@ -87,6 +94,7 @@ public void beforeClass() throws Exception {
hdfs.copyFromLocal(resourcePath + ORC_PRIMITIVE_TYPES_UNORDERED_SUBSET, hdfsPath + ORC_PRIMITIVE_TYPES_UNORDERED_SUBSET);
hdfs.copyFromLocal(resourcePath + ORC_LIST_TYPES, hdfsPath + ORC_LIST_TYPES);
hdfs.copyFromLocal(resourcePath + ORC_MULTIDIM_LIST_TYPES, hdfsPath + ORC_MULTIDIM_LIST_TYPES);
+ hdfs.copyFromLocal(resourcePath + ORC_NULL_IN_STRING, hdfsPath + ORC_NULL_IN_STRING);
prepareReadableExternalTable(PXF_ORC_TABLE, ORC_TABLE_COLUMNS, hdfsPath + ORC_PRIMITIVE_TYPES);
}
@@ -146,6 +154,12 @@ public void orcReadMultiDimensionalLists() throws Exception {
runTincTest("pxf.features.orc.read.multidim_list_types.runTest");
}
+ @Test(groups = {"features", "gpdb", "security", "hcfs"})
+ public void orcReadStringsContainingNullByte() throws Exception {
+ prepareReadableExternalTable("pxf_orc_null_in_string", ORC_NULL_IN_STRING_COLUMNS, hdfsPath + ORC_NULL_IN_STRING);
+ runTincTest("pxf.features.orc.read.null_in_string.runTest");
+ }
+
private void prepareReadableExternalTable(String name, String[] fields, String path) throws Exception {
prepareReadableExternalTable(name, fields, path, false);
}
diff --git a/automation/src/test/resources/data/orc/README.md b/automation/src/test/resources/data/orc/README.md
index 62e5ceb60..0b143878e 100644
--- a/automation/src/test/resources/data/orc/README.md
+++ b/automation/src/test/resources/data/orc/README.md
@@ -103,3 +103,10 @@ If desired, you can copy down the CSV file by running the following command:
mv "./orc_multidim_list_types/000000_0" "./${CSV_FILENAME}"
```
+## Generate the orc_null_in_string.orc file
+
+To generate this file, you will need to download a copy of the ORC Tools uber jar:
+
+```shell
+java -jar orc-tools-1.8.0-uber.jar convert --schema 'struct' --output orc_null_in_string.orc orc_null_in_string.json
+```
diff --git a/automation/src/test/resources/data/orc/orc_null_in_string.json b/automation/src/test/resources/data/orc/orc_null_in_string.json
new file mode 100644
index 000000000..e918aad63
--- /dev/null
+++ b/automation/src/test/resources/data/orc/orc_null_in_string.json
@@ -0,0 +1,6 @@
+{"id": 1, "context": "simple string", "value": "hello"}
+{"id": 2, "context": "simple string with space", "value": "hello world"}
+{"id": 3, "context": "simple string with double quote", "value": "hello \"world\""}
+{"id": 4, "context": "NUL-byte in middle of string", "value": "hello\u0000world"}
+{"id": 5, "context": "NUL-byte at the beginning of string", "value": "\u0000hello world"}
+{"id": 6, "context": "NUL-byte at the end of string", "value": "hello world\u0000"}
diff --git a/automation/src/test/resources/data/orc/orc_null_in_string.orc b/automation/src/test/resources/data/orc/orc_null_in_string.orc
new file mode 100644
index 000000000..3cc1e6aed
Binary files /dev/null and b/automation/src/test/resources/data/orc/orc_null_in_string.orc differ
diff --git a/automation/tincrepo/main/pxf/features/orc/read/null_in_string/__init__.py b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/automation/tincrepo/main/pxf/features/orc/read/null_in_string/expected/query01.ans b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/expected/query01.ans
new file mode 100644
index 000000000..bbfd641ae
--- /dev/null
+++ b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/expected/query01.ans
@@ -0,0 +1,13 @@
+-- start_ignore
+-- end_ignore
+-- @description query01 for reading strings contain NUL-byte from ORC files
+SELECT * FROM pxf_orc_null_in_string ORDER BY id;
+ id | context | value
+----+-------------------------------------+---------------
+ 1 | simple string | hello
+ 2 | simple string with space | hello world
+ 3 | simple string with double quote | hello "world"
+ 4 | NUL-byte in middle of string | hello
+ 5 | NUL-byte at the beginning of string |
+ 6 | NUL-byte at the end of string | hello world
+(6 rows)
\ No newline at end of file
diff --git a/automation/tincrepo/main/pxf/features/orc/read/null_in_string/runTest.py b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/runTest.py
new file mode 100644
index 000000000..916deefe9
--- /dev/null
+++ b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/runTest.py
@@ -0,0 +1,11 @@
+from mpp.models import SQLConcurrencyTestCase
+
+class OrcNullInString(SQLConcurrencyTestCase):
+ """
+ @db_name pxfautomation
+ @concurrency 1
+ @gpdiff True
+ """
+ sql_dir = 'sql'
+ ans_dir = 'expected'
+ out_dir = 'output'
diff --git a/automation/tincrepo/main/pxf/features/orc/read/null_in_string/sql/query01.sql b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/sql/query01.sql
new file mode 100644
index 000000000..52e65cb34
--- /dev/null
+++ b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/sql/query01.sql
@@ -0,0 +1,3 @@
+-- @description query01 for reading strings contain NUL-byte from ORC files
+
+SELECT * FROM pxf_orc_null_in_string ORDER BY id;
diff --git a/concourse/docker/diagram/images.dot b/concourse/docker/diagram/images.dot
index 70dc969fa..463e1e7a6 100644
--- a/concourse/docker/diagram/images.dot
+++ b/concourse/docker/diagram/images.dot
@@ -25,6 +25,7 @@ digraph pxf_container_image_flow {
style=dashed
node [shape=box3d fillcolor="#2aa198" style=filled fontcolor=white]
gp6_rhel8_latest[label="gpdb6-rhel8-test:latest"]
+ gp7_rhel8_latest[label="gpdb7-rhel8-test:latest"]
}
# PXF Cloudbuild & Dockerfiles
@@ -39,6 +40,7 @@ digraph pxf_container_image_flow {
gp6_ubuntu18_dockerfile[label="gpdb6/ubuntu18.04"]
gp6_oel7_dockerfile[label="gpdb6/oel7"]
gp7_centos7_dockerfile[label="gpdb7/centos7"]
+ gp7_rhel8_dockerfile[label="gpdb7/rhel8"]
gp7_ubuntu18_dockerfile[label="gpdb7/ubuntu18.04"]
}
@@ -69,6 +71,7 @@ digraph pxf_container_image_flow {
gp6_ubuntu18_pxf_sha[label="gpdb6-ubuntu18.04-test-pxf:$COMMIT_SHA"]
gp6_oel7_pxf_sha[label="gpdb6-oel7-test-pxf:$COMMIT_SHA"]
gp7_centos7_pxf_sha[label="gpdb7-centos7-test-pxf:$COMMIT_SHA"]
+ gp7_rhel8_pxf_sha[label="gpdb7-rhel8-test-pxf:$COMMIT_SHA"]
gp7_ubuntu18_pxf_sha[label="gpdb7-ubuntu18.04-test-pxf:$COMMIT_SHA"]
gp5_centos7_pxf_latest[label="gpdb5-centos7-test-pxf:latest" style=filled fillcolor="#6c71c4" fontcolor=white]
@@ -77,6 +80,7 @@ digraph pxf_container_image_flow {
gp6_ubuntu18_pxf_latest[label="gpdb6-ubuntu18.04-test-pxf:latest" style=filled fillcolor="#6c71c4" fontcolor=white]
gp6_oel7_pxf_latest[label="gpdb6-oel7-test-pxf:latest" style=filled fillcolor="#6c71c4" fontcolor=white]
gp7_centos7_pxf_latest[label="gpdb7-centos7-test-pxf:latest" style=filled fillcolor="#6c71c4" fontcolor=white]
+ gp7_rhel8_pxf_latest[label="gpdb7-rhel8-test-pxf:latest" style=filled fillcolor="#6c71c4" fontcolor=white]
gp7_ubuntu18_pxf_latest[label="gpdb7-ubuntu18.04-test-pxf:latest" style=filled fillcolor="#6c71c4" fontcolor=white]
}
@@ -159,6 +163,10 @@ digraph pxf_container_image_flow {
gp7_centos7_dockerfile -> gp7_centos7_pxf_sha[label="CloudBuild"]
gp7_centos7_pxf_sha -> gp7_centos7_pxf_latest[label="tag (concourse pipeline)"]
+ gp7_rhel8_latest -> gp7_rhel8_dockerfile
+ gp7_rhel8_dockerfile -> gp7_rhel8_pxf_sha[label="CloudBuild"]
+ gp7_rhel8_pxf_sha -> gp7_rhel8_pxf_latest[label="tag (concourse pipeline)"]
+
gp7_ubuntu18_latest -> gp7_ubuntu18_dockerfile
gp7_ubuntu18_dockerfile -> gp7_ubuntu18_pxf_sha[label="CloudBuild"]
gp7_ubuntu18_pxf_sha -> gp7_ubuntu18_pxf_latest[label="tag (concourse pipeline)"]
@@ -203,6 +211,10 @@ digraph pxf_container_image_flow {
gp7_centos7_pxf_latest -> build
gp7_centos7_pxf_latest -> pr
+ gp7_rhel8_pxf_latest -> certification
+ gp7_rhel8_pxf_latest -> build
+ gp7_rhel8_pxf_latest -> pr
+
gp7_ubuntu18_pxf_latest -> build
gp7_ubuntu18_pxf_latest -> pr
diff --git a/concourse/docker/diagram/images.svg b/concourse/docker/diagram/images.svg
index bceda5558..29746f802 100644
--- a/concourse/docker/diagram/images.svg
+++ b/concourse/docker/diagram/images.svg
@@ -1,742 +1,809 @@
-
-