Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# Changelog

## 6.4.2 (09/19/2022)

### Bug Fixes:

- [#870](https://github.com/greenplum-db/pxf/pull/870) Relax the requirement that C string length matches Java string length

## 6.4.1 (09/16/2022)

### Bug Fixes:

- [#858](https://github.com/greenplum-db/pxf/pull/858) Add JsonProtocolHandler to use HdfsFileFragmenter for multi-line JSON

## 6.4.0 (08/15/2022)

### Enhancements:
Expand Down
6 changes: 0 additions & 6 deletions automation/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,6 @@
<version>1.10.11</version>
</dependency>

<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.2</version>
</dependency>

<!-- AWS Dependencies -->
<dependency>
<groupId>com.amazonaws</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ public void init() throws Exception {
public void addPathToPxfClassPath(String path) throws Exception {
String content = "export PXF_LOADER_PATH=file:" + path;
// path to local fetch pxf class file
File pathToLocalClassPathFile = new File(getPxfBase() + "/conf", getPxfConfigurationFile());

File pathToLocalClassPathFile = new File(getPathToLocalPxfConfDirectory(), getPxfConfigurationFile());
ReportUtils.report(report, getClass(), "Add " + content + " to PXF class path (" + pathToLocalClassPathFile.getAbsolutePath() + ")");
// read file content
String pxfClasspathContent = new String(Files.readAllBytes(Paths.get(pathToLocalClassPathFile.getAbsolutePath())));
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ public class OrcReadTest extends BaseFeature {
private static final String ORC_PRIMITIVE_TYPES_UNORDERED_SUBSET = "orc_types_unordered_subset.orc";
private static final String ORC_LIST_TYPES = "orc_list_types.orc";
private static final String ORC_MULTIDIM_LIST_TYPES = "orc_multidim_list_types.orc";
private static final String ORC_NULL_IN_STRING = "orc_null_in_string.orc";

private static final String[] ORC_TABLE_COLUMNS = {
"id integer",
Expand Down Expand Up @@ -73,6 +74,12 @@ public class OrcReadTest extends BaseFeature {
"varchar_arr text[]"
};

private static final String[] ORC_NULL_IN_STRING_COLUMNS = new String[]{
"id int",
"context text",
"value text"
};

private String hdfsPath;
private ProtocolEnum protocol;

Expand All @@ -87,6 +94,7 @@ public void beforeClass() throws Exception {
hdfs.copyFromLocal(resourcePath + ORC_PRIMITIVE_TYPES_UNORDERED_SUBSET, hdfsPath + ORC_PRIMITIVE_TYPES_UNORDERED_SUBSET);
hdfs.copyFromLocal(resourcePath + ORC_LIST_TYPES, hdfsPath + ORC_LIST_TYPES);
hdfs.copyFromLocal(resourcePath + ORC_MULTIDIM_LIST_TYPES, hdfsPath + ORC_MULTIDIM_LIST_TYPES);
hdfs.copyFromLocal(resourcePath + ORC_NULL_IN_STRING, hdfsPath + ORC_NULL_IN_STRING);

prepareReadableExternalTable(PXF_ORC_TABLE, ORC_TABLE_COLUMNS, hdfsPath + ORC_PRIMITIVE_TYPES);
}
Expand Down Expand Up @@ -146,6 +154,12 @@ public void orcReadMultiDimensionalLists() throws Exception {
runTincTest("pxf.features.orc.read.multidim_list_types.runTest");
}

@Test(groups = {"features", "gpdb", "security", "hcfs"})
public void orcReadStringsContainingNullByte() throws Exception {
prepareReadableExternalTable("pxf_orc_null_in_string", ORC_NULL_IN_STRING_COLUMNS, hdfsPath + ORC_NULL_IN_STRING);
runTincTest("pxf.features.orc.read.null_in_string.runTest");
}

private void prepareReadableExternalTable(String name, String[] fields, String path) throws Exception {
prepareReadableExternalTable(name, fields, path, false);
}
Expand Down
7 changes: 7 additions & 0 deletions automation/src/test/resources/data/orc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,10 @@ If desired, you can copy down the CSV file by running the following command:
mv "./orc_multidim_list_types/000000_0" "./${CSV_FILENAME}"
```

## Generate the orc_null_in_string.orc file

To generate this file, you will need to download a copy of the ORC Tools uber jar: <https://search.maven.org/remotecontent?filepath=org/apache/orc/orc-tools/1.8.0/orc-tools-1.8.0-uber.jar>

```shell
java -jar orc-tools-1.8.0-uber.jar convert --schema 'struct<id:int,desc:string,value:string>' --output orc_null_in_string.orc orc_null_in_string.json
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{"id": 1, "context": "simple string", "value": "hello"}
{"id": 2, "context": "simple string with space", "value": "hello world"}
{"id": 3, "context": "simple string with double quote", "value": "hello \"world\""}
{"id": 4, "context": "NUL-byte in middle of string", "value": "hello\u0000world"}
{"id": 5, "context": "NUL-byte at the beginning of string", "value": "\u0000hello world"}
{"id": 6, "context": "NUL-byte at the end of string", "value": "hello world\u0000"}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-- start_ignore
-- end_ignore
-- @description query01 for reading strings contain NUL-byte from ORC files
SELECT * FROM pxf_orc_null_in_string ORDER BY id;
id | context | value
----+-------------------------------------+---------------
1 | simple string | hello
2 | simple string with space | hello world
3 | simple string with double quote | hello "world"
4 | NUL-byte in middle of string | hello
5 | NUL-byte at the beginning of string |
6 | NUL-byte at the end of string | hello world
(6 rows)
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from mpp.models import SQLConcurrencyTestCase

class OrcNullInString(SQLConcurrencyTestCase):
"""
@db_name pxfautomation
@concurrency 1
@gpdiff True
"""
sql_dir = 'sql'
ans_dir = 'expected'
out_dir = 'output'
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- @description query01 for reading strings contain NUL-byte from ORC files

SELECT * FROM pxf_orc_null_in_string ORDER BY id;
12 changes: 12 additions & 0 deletions concourse/docker/diagram/images.dot
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ digraph pxf_container_image_flow {
style=dashed
node [shape=box3d fillcolor="#2aa198" style=filled fontcolor=white]
gp6_rhel8_latest[label="gpdb6-rhel8-test:latest"]
gp7_rhel8_latest[label="gpdb7-rhel8-test:latest"]
}

# PXF Cloudbuild & Dockerfiles
Expand All @@ -39,6 +40,7 @@ digraph pxf_container_image_flow {
gp6_ubuntu18_dockerfile[label="gpdb6/ubuntu18.04"]
gp6_oel7_dockerfile[label="gpdb6/oel7"]
gp7_centos7_dockerfile[label="gpdb7/centos7"]
gp7_rhel8_dockerfile[label="gpdb7/rhel8"]
gp7_ubuntu18_dockerfile[label="gpdb7/ubuntu18.04"]

}
Expand Down Expand Up @@ -69,6 +71,7 @@ digraph pxf_container_image_flow {
gp6_ubuntu18_pxf_sha[label="gpdb6-ubuntu18.04-test-pxf:$COMMIT_SHA"]
gp6_oel7_pxf_sha[label="gpdb6-oel7-test-pxf:$COMMIT_SHA"]
gp7_centos7_pxf_sha[label="gpdb7-centos7-test-pxf:$COMMIT_SHA"]
gp7_rhel8_pxf_sha[label="gpdb7-rhel8-test-pxf:$COMMIT_SHA"]
gp7_ubuntu18_pxf_sha[label="gpdb7-ubuntu18.04-test-pxf:$COMMIT_SHA"]

gp5_centos7_pxf_latest[label="gpdb5-centos7-test-pxf:latest" style=filled fillcolor="#6c71c4" fontcolor=white]
Expand All @@ -77,6 +80,7 @@ digraph pxf_container_image_flow {
gp6_ubuntu18_pxf_latest[label="gpdb6-ubuntu18.04-test-pxf:latest" style=filled fillcolor="#6c71c4" fontcolor=white]
gp6_oel7_pxf_latest[label="gpdb6-oel7-test-pxf:latest" style=filled fillcolor="#6c71c4" fontcolor=white]
gp7_centos7_pxf_latest[label="gpdb7-centos7-test-pxf:latest" style=filled fillcolor="#6c71c4" fontcolor=white]
gp7_rhel8_pxf_latest[label="gpdb7-rhel8-test-pxf:latest" style=filled fillcolor="#6c71c4" fontcolor=white]
gp7_ubuntu18_pxf_latest[label="gpdb7-ubuntu18.04-test-pxf:latest" style=filled fillcolor="#6c71c4" fontcolor=white]
}

Expand Down Expand Up @@ -159,6 +163,10 @@ digraph pxf_container_image_flow {
gp7_centos7_dockerfile -> gp7_centos7_pxf_sha[label="CloudBuild"]
gp7_centos7_pxf_sha -> gp7_centos7_pxf_latest[label="tag (concourse pipeline)"]

gp7_rhel8_latest -> gp7_rhel8_dockerfile
gp7_rhel8_dockerfile -> gp7_rhel8_pxf_sha[label="CloudBuild"]
gp7_rhel8_pxf_sha -> gp7_rhel8_pxf_latest[label="tag (concourse pipeline)"]

gp7_ubuntu18_latest -> gp7_ubuntu18_dockerfile
gp7_ubuntu18_dockerfile -> gp7_ubuntu18_pxf_sha[label="CloudBuild"]
gp7_ubuntu18_pxf_sha -> gp7_ubuntu18_pxf_latest[label="tag (concourse pipeline)"]
Expand Down Expand Up @@ -203,6 +211,10 @@ digraph pxf_container_image_flow {
gp7_centos7_pxf_latest -> build
gp7_centos7_pxf_latest -> pr

gp7_rhel8_pxf_latest -> certification
gp7_rhel8_pxf_latest -> build
gp7_rhel8_pxf_latest -> pr

gp7_ubuntu18_pxf_latest -> build
gp7_ubuntu18_pxf_latest -> pr

Expand Down
Loading