diff --git a/.gitignore b/.gitignore index 8b245acbf40f6d..c6cd57f5baf957 100644 --- a/.gitignore +++ b/.gitignore @@ -137,6 +137,9 @@ lru_cache_test docker/thirdparties/docker-compose/*/data docker/thirdparties/docker-compose/*/logs docker/thirdparties/docker-compose/*/*.yaml +docker/thirdparties/docker-compose/*/*.env +docker/thirdparties/docker-compose/*/cache/ +docker/thirdparties/docker-compose/*/scripts/SUCCESS docker/runtime/be/resource/apache-doris/ # other diff --git a/docker/thirdparties/docker-compose/hudi/README.md b/docker/thirdparties/docker-compose/hudi/README.md new file mode 100644 index 00000000000000..ffc2bb74494db6 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/README.md @@ -0,0 +1,280 @@ + + +# Hudi Docker Environment + +This directory contains the Docker Compose configuration for setting up a Hudi test environment with Spark, Hive Metastore, MinIO (S3-compatible storage), and PostgreSQL. + +## Components + +- **Spark**: Apache Spark 3.5.7 for processing Hudi tables +- **Hive Metastore**: Starburst Hive Metastore for table metadata management +- **PostgreSQL**: Database backend for Hive Metastore +- **MinIO**: S3-compatible object storage for Hudi data files + +## Important Configuration Parameters + +### Container UID +- **Parameter**: `CONTAINER_UID` in `custom_settings.env` +- **Default**: `doris--` +- **Note**: Must be set to a unique value to avoid conflicts with other Docker environments +- **Example**: `CONTAINER_UID="doris--bender--"` + +### Port Configuration (`hudi.env.tpl`) +- `HIVE_METASTORE_PORT`: Port for Hive Metastore Thrift service (default: 19083) +- `MINIO_API_PORT`: MinIO S3 API port (default: 19100) +- `MINIO_CONSOLE_PORT`: MinIO web console port (default: 19101) +- `SPARK_UI_PORT`: Spark web UI port (default: 18080) + +### MinIO Credentials (`hudi.env.tpl`) +- `MINIO_ROOT_USER`: MinIO access key (default: `minio`) +- `MINIO_ROOT_PASSWORD`: MinIO secret key (default: `minio123`) +- `HUDI_BUCKET`: S3 bucket name for Hudi data (default: `datalake`) + +### Version Compatibility +⚠️ **Important**: Hadoop versions must match Spark's built-in Hadoop version +- **Spark Version**: 3.5.7 (uses Hadoop 3.3.4) - default build for Hudi 1.0.2 +- **Hadoop AWS Version**: 3.3.4 (matching Spark's Hadoop) +- **Hudi Bundle Version**: 1.0.2 Spark 3.5 bundle (default build, matches Spark 3.5.7, matches Doris's Hudi version to avoid versionCode compatibility issues) +- **AWS SDK v1 Version**: 1.12.262 (required for Hadoop 3.3.4 S3A support, 1.12.x series) +- **PostgreSQL JDBC Version**: 42.7.1 (compatible with Hive Metastore) +- **Hudi 1.0.x Compatibility**: Supports Spark 3.5.x (default), 3.4.x, and 3.3.x + +### JAR Dependencies (`hudi.env.tpl`) +All JAR file versions and URLs are configurable: +- `HUDI_BUNDLE_VERSION` / `HUDI_BUNDLE_URL`: Hudi Spark bundle +- `HADOOP_AWS_VERSION` / `HADOOP_AWS_URL`: Hadoop S3A filesystem support +- `AWS_SDK_BUNDLE_VERSION` / `AWS_SDK_BUNDLE_URL`: AWS Java SDK Bundle v1 (required for Hadoop 3.3.4 S3A support, 1.12.x series) + +**Note**: `hadoop-common` is already included in Spark's built-in Hadoop distribution, so it's not configured here. +- `POSTGRESQL_JDBC_VERSION` / `POSTGRESQL_JDBC_URL`: PostgreSQL JDBC driver + +## Starting the Environment + +```bash +# Start Hudi environment +./docker/thirdparties/run-thirdparties-docker.sh -c hudi + +# Stop Hudi environment +./docker/thirdparties/run-thirdparties-docker.sh -c hudi --stop +``` + +## Adding Data + +⚠️ **Important**: To ensure data consistency after Docker restarts, **only use SQL scripts** to add data. Data added through `spark-sql` interactive shell is temporary and will not persist after container restart. + +### Using SQL Scripts + +Add new SQL files in `scripts/create_preinstalled_scripts/hudi/` directory: +- Files are executed in alphabetical order (e.g., `01_config_and_database.sql`, `02_create_user_activity_log_tables.sql`, etc.) +- Use descriptive names with numeric prefixes to control execution order +- Use environment variable substitution: `${HIVE_METASTORE_URIS}` and `${HUDI_BUCKET}` +- **Data created through SQL scripts will persist after Docker restart** + +Example: Create `08_create_custom_table.sql`: +```sql +USE regression_hudi; + +CREATE TABLE IF NOT EXISTS my_hudi_table ( + id BIGINT, + name STRING, + created_at TIMESTAMP +) USING hudi +TBLPROPERTIES ( + type = 'cow', + primaryKey = 'id', + preCombineField = 'created_at', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/my_hudi_table'; + +INSERT INTO my_hudi_table VALUES + (1, 'Alice', TIMESTAMP '2024-01-01 10:00:00'), + (2, 'Bob', TIMESTAMP '2024-01-02 11:00:00'); +``` + +After adding SQL files, restart the container to execute them: +```bash +docker restart doris--hudi-spark +``` + +## Creating Hudi Catalog in Doris + +After starting the Hudi Docker environment, you can create a Hudi catalog in Doris to access Hudi tables: + +```sql +-- Create Hudi catalog +CREATE CATALOG IF NOT EXISTS hudi_catalog PROPERTIES ( + 'type' = 'hms', + 'hive.metastore.uris' = 'thrift://:19083', + 's3.endpoint' = 'http://:19100', + 's3.access_key' = 'minio', + 's3.secret_key' = 'minio123', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' +); + +-- Switch to Hudi catalog +SWITCH hudi_catalog; + +-- Use database +USE regression_hudi; + +-- Show tables +SHOW TABLES; + +-- Query Hudi table +SELECT * FROM user_activity_log_cow_partition LIMIT 10; +``` + +**Configuration Parameters:** +- `hive.metastore.uris`: Hive Metastore Thrift service address (default port: 19083) +- `s3.endpoint`: MinIO S3 API endpoint (default port: 19100) +- `s3.access_key`: MinIO access key (default: `minio`) +- `s3.secret_key`: MinIO secret key (default: `minio123`) +- `s3.region`: S3 region (default: `us-east-1`) +- `use_path_style`: Use path-style access for MinIO (required: `true`) + +Replace `` with your actual external environment IP address (e.g., `127.0.0.1` for localhost). + +## Debugging with Spark SQL + +⚠️ **Note**: The methods below are for debugging purposes only. Data created through `spark-sql` interactive shell will **not persist** after Docker restart. To add persistent data, use SQL scripts as described in the "Adding Data" section. + +### 1. Connect to Spark Container + +```bash +docker exec -it doris--hudi-spark bash +``` + +### 2. Start Spark SQL Interactive Shell + +```bash +/opt/spark/bin/spark-sql \ + --master local[*] \ + --name hudi-debug \ + --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ + --conf spark.sql.catalogImplementation=hive \ + --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension \ + --conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog \ + --conf spark.sql.warehouse.dir=s3a://datalake/warehouse +``` + +### 3. Common Debugging Commands + +```sql +-- Show databases +SHOW DATABASES; + +-- Use database +USE regression_hudi; + +-- Show tables +SHOW TABLES; + +-- Describe table structure +DESCRIBE EXTENDED user_activity_log_cow_partition; + +-- Query data +SELECT * FROM user_activity_log_cow_partition LIMIT 10; + +-- Check Hudi table properties +SHOW TBLPROPERTIES user_activity_log_cow_partition; + +-- View Spark configuration +SET -v; + +-- Check Hudi-specific configurations +SET hoodie.datasource.write.hive_style_partitioning; +``` + +### 4. View Spark Web UI + +Access Spark Web UI at: `http://localhost:18080` (or configured `SPARK_UI_PORT`) + +### 5. Check Container Logs + +```bash +# View Spark container logs +docker logs doris--hudi-spark --tail 100 -f + +# View Hive Metastore logs +docker logs doris--hudi-metastore --tail 100 -f + +# View MinIO logs +docker logs doris--hudi-minio --tail 100 -f +``` + +### 6. Verify S3 Data + +```bash +# Access MinIO console +# URL: http://localhost:19101 (or configured MINIO_CONSOLE_PORT) +# Username: minio (or MINIO_ROOT_USER) +# Password: minio123 (or MINIO_ROOT_PASSWORD) + +# Or use MinIO client +docker exec -it doris--hudi-minio-mc mc ls myminio/datalake/warehouse/regression_hudi/ +``` + +## Troubleshooting + +### Container Exits Immediately +- Check logs: `docker logs doris--hudi-spark` +- Verify SUCCESS file exists: `docker exec doris--hudi-spark test -f /opt/hudi-scripts/SUCCESS` +- Ensure Hive Metastore is running: `docker ps | grep metastore` + +### ClassNotFoundException Errors +- Verify JAR files are downloaded: `docker exec doris--hudi-spark ls -lh /opt/hudi-cache/` +- Check JAR versions match Spark's Hadoop version (3.3.4) +- Review `hudi.env.tpl` for correct version numbers + +### S3A Connection Issues +- Verify MinIO is running: `docker ps | grep minio` +- Check MinIO credentials in `hudi.env.tpl` +- Test S3 connection: `docker exec doris--hudi-minio-mc mc ls myminio/` + +### Hive Metastore Connection Issues +- Check Metastore is ready: `docker logs doris--hudi-metastore | grep "Metastore is ready"` +- Verify PostgreSQL is running: `docker ps | grep metastore-db` +- Test connection: `docker exec doris--hudi-metastore-db pg_isready -U hive` + +## File Structure + +``` +hudi/ +├── hudi.yaml.tpl # Docker Compose template +├── hudi.env.tpl # Environment variables template +├── scripts/ +│ ├── init.sh # Initialization script +│ ├── create_preinstalled_scripts/ +│ │ └── hudi/ # SQL scripts (01_config_and_database.sql, 02_create_user_activity_log_tables.sql, ...) +│ └── SUCCESS # Initialization marker (generated) +└── cache/ # Downloaded JAR files (generated) +``` + +## Notes + +- All generated files (`.yaml`, `.env`, `cache/`, `SUCCESS`) are ignored by Git +- SQL scripts support environment variable substitution using `${VARIABLE_NAME}` syntax +- Hadoop version compatibility is critical - must match Spark's built-in version +- Container keeps running after initialization for healthcheck and debugging + diff --git a/docker/thirdparties/docker-compose/hudi/hadoop.env b/docker/thirdparties/docker-compose/hudi/hadoop.env deleted file mode 100644 index 28ef46c3eb2ae0..00000000000000 --- a/docker/thirdparties/docker-compose/hudi/hadoop.env +++ /dev/null @@ -1,52 +0,0 @@ - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore -HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver -HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive -HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive -HIVE_SITE_CONF_datanucleus_autoCreateSchema=false -HIVE_SITE_CONF_hive_metastore_uris=thrift://hivemetastore:9083 - -HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false -HDFS_CONF_dfs_webhdfs_enabled=true -HDFS_CONF_dfs_permissions_enabled=false -#HDFS_CONF_dfs_client_use_datanode_hostname=true -#HDFS_CONF_dfs_namenode_use_datanode_hostname=true -HDFS_CONF_dfs_replication=1 - -CORE_CONF_fs_defaultFS=hdfs://namenode:8020 -CORE_CONF_hadoop_http_staticuser_user=root -CORE_CONF_hadoop_proxyuser_hue_hosts=* -CORE_CONF_hadoop_proxyuser_hue_groups=* - -YARN_CONF_yarn_log___aggregation___enable=true -YARN_CONF_yarn_resourcemanager_recovery_enabled=true -YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore -YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate -YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs -YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ -YARN_CONF_yarn_timeline___service_enabled=true -YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true -YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true -YARN_CONF_yarn_resourcemanager_hostname=resourcemanager -YARN_CONF_yarn_timeline___service_hostname=historyserver -YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 -YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 -YARN_CONF_yarn_resourcemanager_resource___tracker_address=resourcemanager:8031 -YARN_CONF_yarn_nodemanager_vmem___check___enabled=false diff --git a/docker/thirdparties/docker-compose/hudi/hudi.env.tpl b/docker/thirdparties/docker-compose/hudi/hudi.env.tpl new file mode 100644 index 00000000000000..022909856d872e --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/hudi.env.tpl @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +CONTAINER_UID=doris-- +HUDI_NETWORK=${CONTAINER_UID}hudi-network + +# Ports exposed to host +HIVE_METASTORE_PORT=19083 +MINIO_API_PORT=19100 +MINIO_CONSOLE_PORT=19101 +SPARK_UI_PORT=18080 + +# MinIO credentials/buckets +MINIO_ROOT_USER=minio +MINIO_ROOT_PASSWORD=minio123 +HUDI_BUCKET=datalake + +# Hudi bundle +# Hudi 1.0.2 supports Spark 3.5.x (default), 3.4.x, and 3.3.x +# Using Spark 3.5 bundle to match Spark 3.5.7 image (default build) +HUDI_BUNDLE_VERSION=1.0.2 +HUDI_BUNDLE_URL=https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark3.5-bundle_2.12/1.0.2/hudi-spark3.5-bundle_2.12-1.0.2.jar + +# Hadoop AWS S3A filesystem (required for S3A support) +# Note: Version must match Spark's built-in Hadoop version (3.3.4 for Spark 3.5.7) +HADOOP_AWS_VERSION=3.3.4 +HADOOP_AWS_URL=https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar + +# AWS Java SDK Bundle v1 (required for Hadoop 3.3.4 S3A support) +# Note: Hadoop 3.3.x uses AWS SDK v1, version 1.12.x is recommended +AWS_SDK_BUNDLE_VERSION=1.12.262 +AWS_SDK_BUNDLE_URL=https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.262/aws-java-sdk-bundle-1.12.262.jar + +# PostgreSQL JDBC driver (required for Hive Metastore connection) +POSTGRESQL_JDBC_VERSION=42.7.1 +POSTGRESQL_JDBC_URL=https://repo1.maven.org/maven2/org/postgresql/postgresql/42.7.1/postgresql-42.7.1.jar diff --git a/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl b/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl index f15346a90ea20e..a08f47212ae6f6 100644 --- a/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl +++ b/docker/thirdparties/docker-compose/hudi/hudi.yaml.tpl @@ -1,270 +1,135 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -version: "3.3" networks: - doris--hudi: - ipam: - driver: default - config: - - subnet: 168.37.0.0/24 + ${HUDI_NETWORK}: + name: ${HUDI_NETWORK} services: - - namenode: - image: apachehudi/hudi-hadoop_2.8.4-namenode:latest - hostname: namenode - container_name: namenode + ${CONTAINER_UID}hudi-minio: + image: minio/minio:RELEASE.2025-01-20T14-49-07Z + container_name: ${CONTAINER_UID}hudi-minio + command: server /data --console-address ":${MINIO_CONSOLE_PORT}" environment: - - CLUSTER_NAME=hudi_hadoop284_hive232_spark244 + MINIO_ROOT_USER: ${MINIO_ROOT_USER} + MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD} ports: - - "50070:50070" - - "8020:8020" - # JVM debugging port (will be mapped to a random port on host) - - "5005" - env_file: - - ./hadoop.env - healthcheck: - test: ["CMD", "curl", "-f", "http://namenode:50070"] - interval: 30s - timeout: 10s - retries: 3 + - "${MINIO_API_PORT}:9000" + - "${MINIO_CONSOLE_PORT}:9001" networks: - - doris--hudi + - ${HUDI_NETWORK} - datanode1: - image: apachehudi/hudi-hadoop_2.8.4-datanode:latest - container_name: datanode1 - hostname: datanode1 - environment: - - CLUSTER_NAME=hudi_hadoop284_hive232_spark244 - env_file: - - ./hadoop.env - ports: - - "50075:50075" - - "50010:50010" - # JVM debugging port (will be mapped to a random port on host) - - "5005" - links: - - "namenode" - - "historyserver" - healthcheck: - test: ["CMD", "curl", "-f", "http://datanode1:50075"] - interval: 30s - timeout: 10s - retries: 3 + ${CONTAINER_UID}hudi-minio-mc: + image: minio/mc:RELEASE.2025-01-17T23-25-50Z + container_name: ${CONTAINER_UID}hudi-minio-mc + entrypoint: | + /bin/bash -c " + set -euo pipefail + sleep 5 + mc alias set myminio http://${CONTAINER_UID}hudi-minio:9000 ${MINIO_ROOT_USER} ${MINIO_ROOT_PASSWORD} + mc mb --quiet myminio/${HUDI_BUCKET} || true + mc mb --quiet myminio/${HUDI_BUCKET}-tmp || true + " depends_on: - - namenode + - ${CONTAINER_UID}hudi-minio networks: - - doris--hudi + - ${HUDI_NETWORK} - historyserver: - image: apachehudi/hudi-hadoop_2.8.4-history:latest - hostname: historyserver - container_name: historyserver + ${CONTAINER_UID}hudi-metastore-db: + image: postgres:14 + container_name: ${CONTAINER_UID}hudi-metastore-db environment: - - CLUSTER_NAME=hudi_hadoop284_hive232_spark244 - depends_on: - - "namenode" - links: - - "namenode" - ports: - - "58188:8188" - healthcheck: - test: ["CMD", "curl", "-f", "http://historyserver:8188"] - interval: 30s - timeout: 10s - retries: 3 - env_file: - - ./hadoop.env - volumes: - - ./historyserver:/hadoop/yarn/timeline - networks: - - doris--hudi - - hive-metastore-postgresql: - image: bde2020/hive-metastore-postgresql:2.3.0 - volumes: - - ./hive-metastore-postgresql:/var/lib/postgresql - hostname: hive-metastore-postgresql - container_name: hive-metastore-postgresql + POSTGRES_USER: hive + POSTGRES_PASSWORD: hive + POSTGRES_DB: metastore networks: - - doris--hudi + - ${HUDI_NETWORK} - hivemetastore: - image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest - hostname: hivemetastore - container_name: hivemetastore - links: - - "hive-metastore-postgresql" - - "namenode" - env_file: - - ./hadoop.env - command: /opt/hive/bin/hive --service metastore + ${CONTAINER_UID}hudi-metastore: + image: starburstdata/hive:3.1.2-e.18 + container_name: ${CONTAINER_UID}hudi-metastore + hostname: ${CONTAINER_UID}hudi-metastore environment: - SERVICE_PRECONDITION: "namenode:50070 hive-metastore-postgresql:5432" - ports: - - "9083:9083" - # JVM debugging port (will be mapped to a random port on host) - - "5005" - healthcheck: - test: ["CMD", "nc", "-z", "hivemetastore", "9083"] - interval: 30s - timeout: 10s - retries: 3 + HIVE_METASTORE_DRIVER: org.postgresql.Driver + HIVE_METASTORE_JDBC_URL: jdbc:postgresql://${CONTAINER_UID}hudi-metastore-db:5432/metastore + HIVE_METASTORE_USER: hive + HIVE_METASTORE_PASSWORD: hive + HIVE_METASTORE_WAREHOUSE_DIR: s3a://${HUDI_BUCKET}/warehouse + S3_ENDPOINT: http://${CONTAINER_UID}hudi-minio:9000 + S3_ACCESS_KEY: ${MINIO_ROOT_USER} + S3_SECRET_KEY: ${MINIO_ROOT_PASSWORD} + S3_PATH_STYLE_ACCESS: "true" + REGION: "us-east-1" + GOOGLE_CLOUD_KEY_FILE_PATH: "" + AZURE_ADL_CLIENT_ID: "" + AZURE_ADL_CREDENTIAL: "" + AZURE_ADL_REFRESH_URL: "" + AZURE_ABFS_STORAGE_ACCOUNT: "" + AZURE_ABFS_ACCESS_KEY: "" + AZURE_WASB_STORAGE_ACCOUNT: "" + AZURE_ABFS_OAUTH: "" + AZURE_ABFS_OAUTH_TOKEN_PROVIDER: "" + AZURE_ABFS_OAUTH_CLIENT_ID: "" + AZURE_ABFS_OAUTH_SECRET: "" + AZURE_ABFS_OAUTH_ENDPOINT: "" + AZURE_WASB_ACCESS_KEY: "" + HIVE_METASTORE_USERS_IN_ADMIN_ROLE: "hive" depends_on: - - "hive-metastore-postgresql" - - "namenode" - networks: - - doris--hudi - - hiveserver: - image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:latest - hostname: hiveserver - container_name: hiveserver - env_file: - - ./hadoop.env - environment: - SERVICE_PRECONDITION: "hivemetastore:9083" + - ${CONTAINER_UID}hudi-metastore-db + - ${CONTAINER_UID}hudi-minio ports: - - "10000:10000" - # JVM debugging port (will be mapped to a random port on host) - - "5005" - depends_on: - - "hivemetastore" - links: - - "hivemetastore" - - "hive-metastore-postgresql" - - "namenode" - volumes: - - ./scripts:/var/scripts - networks: - - doris--hudi - - sparkmaster: - image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkmaster_2.4.4:latest - hostname: sparkmaster - container_name: sparkmaster - env_file: - - ./hadoop.env - ports: - - "8080:8080" - - "7077:7077" - # JVM debugging port (will be mapped to a random port on host) - - "5005" - environment: - - INIT_DAEMON_STEP=setup_spark - links: - - "hivemetastore" - - "hiveserver" - - "hive-metastore-postgresql" - - "namenode" + - "${HIVE_METASTORE_PORT}:9083" networks: - - doris--hudi + - ${HUDI_NETWORK} - spark-worker-1: - image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkworker_2.4.4:latest - hostname: spark-worker-1 - container_name: spark-worker-1 - env_file: - - ./hadoop.env - depends_on: - - sparkmaster - ports: - - "8081:8081" - # JVM debugging port (will be mapped to a random port on host) - - "5005" + ${CONTAINER_UID}hudi-spark: + image: spark:3.5.7-scala2.12-java17-ubuntu + container_name: ${CONTAINER_UID}hudi-spark + hostname: ${CONTAINER_UID}hudi-spark + user: root environment: - - "SPARK_MASTER=spark://sparkmaster:7077" - links: - - "hivemetastore" - - "hiveserver" - - "hive-metastore-postgresql" - - "namenode" - networks: - - doris--hudi - -# zookeeper: -# image: 'bitnami/zookeeper:3.4.12-r68' -# hostname: zookeeper -# container_name: zookeeper -# ports: -# - "2181:2181" -# environment: -# - ALLOW_ANONYMOUS_LOGIN=yes -# networks: -# - doris--hudi - -# kafka: -# image: 'bitnami/kafka:2.0.0' -# hostname: kafkabroker -# container_name: kafkabroker -# ports: -# - "9092:9092" -# environment: -# - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 -# - ALLOW_PLAINTEXT_LISTENER=yes -# networks: -# - doris--hudi - - adhoc-1: - image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest - hostname: adhoc-1 - container_name: adhoc-1 - env_file: - - ./hadoop.env - depends_on: - - sparkmaster - ports: - - '4040:4040' - # JVM debugging port (mapped to 5006 on the host) - - "5006:5005" - environment: - - "SPARK_MASTER=spark://sparkmaster:7077" - links: - - "hivemetastore" - - "hiveserver" - - "hive-metastore-postgresql" - - "namenode" + HUDI_BUNDLE_VERSION: ${HUDI_BUNDLE_VERSION} + HUDI_BUNDLE_URL: ${HUDI_BUNDLE_URL} + HADOOP_AWS_VERSION: ${HADOOP_AWS_VERSION} + HADOOP_AWS_URL: ${HADOOP_AWS_URL} + AWS_SDK_BUNDLE_VERSION: ${AWS_SDK_BUNDLE_VERSION} + AWS_SDK_BUNDLE_URL: ${AWS_SDK_BUNDLE_URL} + POSTGRESQL_JDBC_VERSION: ${POSTGRESQL_JDBC_VERSION} + POSTGRESQL_JDBC_URL: ${POSTGRESQL_JDBC_URL} + MINIO_ROOT_USER: ${MINIO_ROOT_USER} + MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD} + HUDI_BUCKET: ${HUDI_BUCKET} + HIVE_METASTORE_URIS: thrift://${CONTAINER_UID}hudi-metastore:9083 + S3_ENDPOINT: http://${CONTAINER_UID}hudi-minio:9000 volumes: - - ./scripts:/var/scripts - networks: - - doris--hudi - - adhoc-2: - image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:latest - hostname: adhoc-2 - container_name: adhoc-2 - env_file: - - ./hadoop.env - ports: - # JVM debugging port (mapped to 5005 on the host) - - "5005:5005" + - ./scripts:/opt/hudi-scripts + - ./cache:/opt/hudi-cache depends_on: - - sparkmaster - environment: - - "SPARK_MASTER=spark://sparkmaster:7077" - links: - - "hivemetastore" - - "hiveserver" - - "hive-metastore-postgresql" - - "namenode" - volumes: - - ./scripts:/var/scripts + - ${CONTAINER_UID}hudi-minio + - ${CONTAINER_UID}hudi-minio-mc + - ${CONTAINER_UID}hudi-metastore + command: ["/opt/hudi-scripts/init.sh"] + ports: + - "${SPARK_UI_PORT}:8080" + healthcheck: + test: ["CMD", "test", "-f", "/opt/hudi-scripts/SUCCESS"] + interval: 5s + timeout: 10s + retries: 120 + start_period: 30s networks: - - doris--hudi + - ${HUDI_NETWORK} diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/base.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/base.properties deleted file mode 100644 index 0666245758e11a..00000000000000 --- a/docker/thirdparties/docker-compose/hudi/scripts/config/base.properties +++ /dev/null @@ -1,25 +0,0 @@ - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -hoodie.upsert.shuffle.parallelism=2 -hoodie.insert.shuffle.parallelism=2 -hoodie.delete.shuffle.parallelism=2 -hoodie.bulkinsert.shuffle.parallelism=2 -hoodie.embed.timeline.server=true -hoodie.filesystem.view.type=EMBEDDED_KV_STORE -hoodie.compact.inline=false diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/dfs-source.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/dfs-source.properties deleted file mode 100644 index 04c16e272a5701..00000000000000 --- a/docker/thirdparties/docker-compose/hudi/scripts/config/dfs-source.properties +++ /dev/null @@ -1,31 +0,0 @@ - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -include=base.properties -# Key fields, for kafka example -hoodie.datasource.write.recordkey.field=key -hoodie.datasource.write.partitionpath.field=date -# NOTE: We have to duplicate configuration since this is being used -# w/ both Spark and DeltaStreamer -hoodie.table.recordkey.fields=key -hoodie.table.partition.fields=date -# Schema provider props (change to absolute path based on your installation) -hoodie.deltastreamer.schemaprovider.source.schema.file=/var/demo/config/schema.avsc -hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.avsc -# DFS Source -hoodie.deltastreamer.source.dfs.root=/usr/hive/data/input/ diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-incr.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-incr.properties deleted file mode 100644 index c796063ff1a9dc..00000000000000 --- a/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-incr.properties +++ /dev/null @@ -1,34 +0,0 @@ - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -hoodie.upsert.shuffle.parallelism=2 -hoodie.insert.shuffle.parallelism=2 -hoodie.delete.shuffle.parallelism=2 -hoodie.bulkinsert.shuffle.parallelism=2 -hoodie.datasource.write.recordkey.field=_row_key -hoodie.datasource.write.partitionpath.field=partition -hoodie.deltastreamer.schemaprovider.source.schema.file=file:///var/hoodie/ws/docker/demo/config/hoodie-schema.avsc -hoodie.deltastreamer.schemaprovider.target.schema.file=file:///var/hoodie/ws/docker/demo/config/hoodie-schema.avsc -hoodie.deltastreamer.source.hoodieincr.partition.fields=partition -hoodie.deltastreamer.source.hoodieincr.path=/docker_hoodie_sync_valid_test -hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt=true -# hive sync -hoodie.datasource.hive_sync.table=docker_hoodie_sync_valid_test_2 -hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/ -hoodie.datasource.hive_sync.partition_fields=partition -hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-schema.avsc b/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-schema.avsc deleted file mode 100644 index f97742c947c7f6..00000000000000 --- a/docker/thirdparties/docker-compose/hudi/scripts/config/hoodie-schema.avsc +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -{ - "type": "record", - "name": "triprec", - "fields": [ - { - "name": "timestamp", - "type": "double" - }, - { - "name": "_row_key", - "type": "string" - }, - { - "name": "rider", - "type": "string" - }, - { - "name": "driver", - "type": "string" - }, - { - "name": "begin_lat", - "type": "double" - }, - { - "name": "begin_lon", - "type": "double" - }, - { - "name": "end_lat", - "type": "double" - }, - { - "name": "end_lon", - "type": "double" - }, - { - "name": "distance_in_meters", - "type": "int" - }, - { - "name": "seconds_since_epoch", - "type": "long" - }, - { - "name": "weight", - "type": "float" - }, - { - "name": "nation", - "type": "bytes" - }, - { - "name": "current_date", - "type": { - "type": "int", - "logicalType": "date" - } - }, - { - "name": "current_ts", - "type": { - "type": "long", - "logicalType": "timestamp-micros" - } - }, - { - "name": "height", - "type": { - "type": "fixed", - "name": "abc", - "size": 5, - "logicalType": "decimal", - "precision": 10, - "scale": 6 - } - }, - { - "name": "city_to_state", - "type": { - "type": "map", - "values": "string" - } - }, - { - "name": "fare", - "type": { - "type": "record", - "name": "fare", - "fields": [ - { - "name": "amount", - "type": "double" - }, - { - "name": "currency", - "type": "string" - } - ] - } - }, - { - "name": "tip_history", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "tip_history", - "fields": [ - { - "name": "amount", - "type": "double" - }, - { - "name": "currency", - "type": "string" - } - ] - } - } - }, - { - "name": "_hoodie_is_deleted", - "type": "boolean", - "default": false - } - ] -} diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/kafka-source.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/kafka-source.properties deleted file mode 100644 index 5ba5290ca692a5..00000000000000 --- a/docker/thirdparties/docker-compose/hudi/scripts/config/kafka-source.properties +++ /dev/null @@ -1,30 +0,0 @@ - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -include=base.properties -# Key fields, for kafka example -hoodie.datasource.write.recordkey.field=key -hoodie.datasource.write.partitionpath.field=date -# Schema provider props (change to absolute path based on your installation) -hoodie.deltastreamer.schemaprovider.source.schema.file=/var/demo/config/schema.avsc -hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.avsc -# Kafka Source -hoodie.deltastreamer.source.kafka.topic=stock_ticks -#Kafka props -bootstrap.servers=kafkabroker:9092 -auto.offset.reset=earliest diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/log4j2.properties b/docker/thirdparties/docker-compose/hudi/scripts/config/log4j2.properties deleted file mode 100644 index 86450ead3eea8f..00000000000000 --- a/docker/thirdparties/docker-compose/hudi/scripts/config/log4j2.properties +++ /dev/null @@ -1,61 +0,0 @@ -### -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -### -status = warn -name = HudiConsoleLog - -# Set everything to be logged to the console -appender.console.type = Console -appender.console.name = CONSOLE -appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n - -# Root logger level -rootLogger.level = warn -# Root logger referring to console appender -rootLogger.appenderRef.stdout.ref = CONSOLE - -# Set the default spark-shell log level to WARN. When running the spark-shell, the -# log level for this class is used to overwrite the root logger's log level, so that -# the user can have different defaults for the shell and regular Spark apps. -logger.apache_spark_repl.name = org.apache.spark.repl.Main -logger.apache_spark_repl.level = warn -# Set logging of integration testsuite to INFO level -logger.hudi_integ.name = org.apache.hudi.integ.testsuite -logger.hudi_integ.level = info -# Settings to quiet third party logs that are too verbose -logger.apache_spark_jetty.name = org.spark_project.jetty -logger.apache_spark_jetty.level = warn -logger.apache_spark_jett_lifecycle.name = org.spark_project.jetty.util.component.AbstractLifeCycle -logger.apache_spark_jett_lifecycle.level = error -logger.apache_spark_repl_imain.name = org.apache.spark.repl.SparkIMain$exprTyper -logger.apache_spark_repl_imain.level = info -logger.apache_spark_repl_iloop.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter -logger.apache_spark_repl_iloop.level = info -logger.parquet.name = org.apache.parquet -logger.parquet.level = error -logger.spark.name = org.apache.spark -logger.spark.level = warn -# Disabling Jetty logs -logger.jetty.name = org.apache.hudi.org.eclipse.jetty -logger.jetty.level = error -# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support -logger.hive_handler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler -logger.hive_handler.level = fatal -logger.hive_func_registry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry -logger.hive_func_registry.level = error diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/schema.avsc b/docker/thirdparties/docker-compose/hudi/scripts/config/schema.avsc deleted file mode 100644 index aa8baaf44b4f48..00000000000000 --- a/docker/thirdparties/docker-compose/hudi/scripts/config/schema.avsc +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -{ - "type":"record", - "name":"stock_ticks", - "fields":[{ - "name": "volume", - "type": "long" - }, { - "name": "ts", - "type": "string" - }, { - "name": "symbol", - "type": "string" - },{ - "name": "year", - "type": "int" - },{ - "name": "month", - "type": "string" - },{ - "name": "high", - "type": "double" - },{ - "name": "low", - "type": "double" - },{ - "name": "key", - "type": "string" - },{ - "name": "date", - "type":"string" - }, { - "name": "close", - "type": "double" - }, { - "name": "open", - "type": "double" - }, { - "name": "day", - "type":"string" - } -]} diff --git a/docker/thirdparties/docker-compose/hudi/scripts/config/spark-defaults.conf b/docker/thirdparties/docker-compose/hudi/scripts/config/spark-defaults.conf deleted file mode 100644 index d085bfe5889288..00000000000000 --- a/docker/thirdparties/docker-compose/hudi/scripts/config/spark-defaults.conf +++ /dev/null @@ -1,30 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -# Default system properties included when running spark-submit. -# This is useful for setting default environmental settings. - -# Example: -spark.master local[3] -spark.eventLog.dir hdfs://namenode:8020/tmp/spark-events -spark.serializer org.apache.spark.serializer.KryoSerializer -spark.kryo.registrator org.apache.spark.HoodieSparkKryoRegistrar - -#spark.executor.memory 4g -# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" diff --git a/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/01_config_and_database.sql b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/01_config_and_database.sql new file mode 100644 index 00000000000000..dff11cf95f43ec --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/01_config_and_database.sql @@ -0,0 +1,11 @@ +-- Hudi configuration and database setup +SET hoodie.datasource.write.hive_style_partitioning = true; +SET hoodie.upsert.shuffle.parallelism = 2; +SET hoodie.insert.shuffle.parallelism = 2; + +-- set time zone to UTC to ensure consistent timestamp handling across different machines +SET TIME ZONE 'UTC'; + +CREATE DATABASE IF NOT EXISTS regression_hudi; +USE regression_hudi; + diff --git a/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/02_create_user_activity_log_tables.sql b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/02_create_user_activity_log_tables.sql new file mode 100644 index 00000000000000..7eeab3036c3911 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/02_create_user_activity_log_tables.sql @@ -0,0 +1,160 @@ +-- Create user_activity_log tables (COW/MOR, partitioned/non-partitioned) and insert demo data +USE regression_hudi; + +-- Drop existing tables if they exist +DROP TABLE IF EXISTS user_activity_log_cow_partition; +DROP TABLE IF EXISTS user_activity_log_cow_non_partition; +DROP TABLE IF EXISTS user_activity_log_mor_partition; +DROP TABLE IF EXISTS user_activity_log_mor_non_partition; + +-- Create COW partitioned table +CREATE TABLE IF NOT EXISTS user_activity_log_cow_partition ( + user_id BIGINT, + event_time BIGINT, + action STRING, + dt STRING +) USING hudi +TBLPROPERTIES ( + type = 'cow', + primaryKey = 'user_id', + preCombineField = 'event_time', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms', + hoodie.datasource.hive_sync.support_timestamp = 'true' +) +PARTITIONED BY (dt) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/user_activity_log_cow_partition'; + +-- Create COW non-partitioned table +CREATE TABLE IF NOT EXISTS user_activity_log_cow_non_partition ( + user_id BIGINT, + event_time BIGINT, + action STRING +) USING hudi +TBLPROPERTIES ( + type = 'cow', + primaryKey = 'user_id', + preCombineField = 'event_time', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms', + hoodie.datasource.hive_sync.support_timestamp = 'true' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/user_activity_log_cow_non_partition'; + +-- Create MOR partitioned table +CREATE TABLE IF NOT EXISTS user_activity_log_mor_partition ( + user_id BIGINT, + event_time BIGINT, + action STRING, + dt STRING +) USING hudi +TBLPROPERTIES ( + type = 'mor', + primaryKey = 'user_id', + preCombineField = 'event_time', + hoodie.compact.inline = 'true', + hoodie.compact.inline.max.delta.commits = '1', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms', + hoodie.datasource.hive_sync.support_timestamp = 'true' +) +PARTITIONED BY (dt) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/user_activity_log_mor_partition'; + +-- Create MOR non-partitioned table +CREATE TABLE IF NOT EXISTS user_activity_log_mor_non_partition ( + user_id BIGINT, + event_time BIGINT, + action STRING +) USING hudi +TBLPROPERTIES ( + type = 'mor', + primaryKey = 'user_id', + preCombineField = 'event_time', + hoodie.compact.inline = 'true', + hoodie.compact.inline.max.delta.commits = '1', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms', + hoodie.datasource.hive_sync.support_timestamp = 'true' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/user_activity_log_mor_non_partition'; + +-- Insert demo data into tables with multiple small inserts for timetravel testing +-- Each INSERT creates a new commit, allowing us to test timetravel queries + +-- Insert data into user_activity_log_cow_partition (5 commits, 2 rows each) +INSERT INTO user_activity_log_cow_partition VALUES + (1, 1710000000000, 'login', '2024-03-01'), + (2, 1710000001000, 'click', '2024-03-01'); +INSERT INTO user_activity_log_cow_partition VALUES + (3, 1710000002000, 'logout', '2024-03-02'), + (4, 1710000003000, 'view', '2024-03-01'); +INSERT INTO user_activity_log_cow_partition VALUES + (5, 1710000004000, 'purchase', '2024-03-02'), + (6, 1710000005000, 'search', '2024-03-01'); +INSERT INTO user_activity_log_cow_partition VALUES + (7, 1710000006000, 'add_to_cart', '2024-03-02'), + (8, 1710000007000, 'remove_from_cart', '2024-03-01'); +INSERT INTO user_activity_log_cow_partition VALUES + (9, 1710000008000, 'share', '2024-03-02'), + (10, 1710000009000, 'comment', '2024-03-01'); + +-- Insert data into user_activity_log_cow_non_partition (5 commits, 2 rows each) +INSERT INTO user_activity_log_cow_non_partition VALUES + (1, 1710000000000, 'login'), + (2, 1710000001000, 'click'); +INSERT INTO user_activity_log_cow_non_partition VALUES + (3, 1710000002000, 'logout'), + (4, 1710000003000, 'view'); +INSERT INTO user_activity_log_cow_non_partition VALUES + (5, 1710000004000, 'purchase'), + (6, 1710000005000, 'search'); +INSERT INTO user_activity_log_cow_non_partition VALUES + (7, 1710000006000, 'add_to_cart'), + (8, 1710000007000, 'remove_from_cart'); +INSERT INTO user_activity_log_cow_non_partition VALUES + (9, 1710000008000, 'share'), + (10, 1710000009000, 'comment'); + +-- Insert data into user_activity_log_mor_partition (5 commits, 2 rows each) +INSERT INTO user_activity_log_mor_partition VALUES + (1, 1710000000000, 'login', '2024-03-01'), + (2, 1710000001000, 'click', '2024-03-01'); +INSERT INTO user_activity_log_mor_partition VALUES + (3, 1710000002000, 'logout', '2024-03-02'), + (4, 1710000003000, 'view', '2024-03-01'); +INSERT INTO user_activity_log_mor_partition VALUES + (5, 1710000004000, 'purchase', '2024-03-02'), + (6, 1710000005000, 'search', '2024-03-01'); +INSERT INTO user_activity_log_mor_partition VALUES + (7, 1710000006000, 'add_to_cart', '2024-03-02'), + (8, 1710000007000, 'remove_from_cart', '2024-03-01'); +INSERT INTO user_activity_log_mor_partition VALUES + (9, 1710000008000, 'share', '2024-03-02'), + (10, 1710000009000, 'comment', '2024-03-01'); + +-- Insert data into user_activity_log_mor_non_partition (5 commits, 2 rows each) +INSERT INTO user_activity_log_mor_non_partition VALUES + (1, 1710000000000, 'login'), + (2, 1710000001000, 'click'); +INSERT INTO user_activity_log_mor_non_partition VALUES + (3, 1710000002000, 'logout'), + (4, 1710000003000, 'view'); +INSERT INTO user_activity_log_mor_non_partition VALUES + (5, 1710000004000, 'purchase'), + (6, 1710000005000, 'search'); +INSERT INTO user_activity_log_mor_non_partition VALUES + (7, 1710000006000, 'add_to_cart'), + (8, 1710000007000, 'remove_from_cart'); +INSERT INTO user_activity_log_mor_non_partition VALUES + (9, 1710000008000, 'share'), + (10, 1710000009000, 'comment'); + diff --git a/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/03_create_orc_tables.sql b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/03_create_orc_tables.sql new file mode 100644 index 00000000000000..0160856c2a52c6 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/03_create_orc_tables.sql @@ -0,0 +1,49 @@ +-- Create ORC format Hudi tables +USE regression_hudi; + +-- Drop existing tables if they exist +DROP TABLE IF EXISTS orc_hudi_table_cow; +DROP TABLE IF EXISTS orc_hudi_table_mor; + +CREATE TABLE IF NOT EXISTS orc_hudi_table_cow ( + id BIGINT, + name STRING, + value DOUBLE +) USING hudi +TBLPROPERTIES ( + type = 'cow', + primaryKey = 'id', + hoodie.base.file.format = 'orc', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/orc_hudi_table_cow'; + +CREATE TABLE IF NOT EXISTS orc_hudi_table_mor ( + id BIGINT, + name STRING, + value DOUBLE +) USING hudi +TBLPROPERTIES ( + type = 'mor', + primaryKey = 'id', + hoodie.base.file.format = 'orc', + hoodie.compact.inline = 'true', + hoodie.compact.inline.max.delta.commits = '1', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/orc_hudi_table_mor'; + +INSERT INTO orc_hudi_table_cow VALUES + (1, 'test1', 10.5), + (2, 'test2', 20.5); + +INSERT INTO orc_hudi_table_mor VALUES + (1, 'test1', 10.5), + (2, 'test2', 20.5); + diff --git a/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/04_create_time_travel_tables.sql b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/04_create_time_travel_tables.sql new file mode 100644 index 00000000000000..ad3aa5e6939c9c --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/04_create_time_travel_tables.sql @@ -0,0 +1,48 @@ +-- Create time travel test tables +USE regression_hudi; + +-- Drop existing tables if they exist +DROP TABLE IF EXISTS timetravel_cow; +DROP TABLE IF EXISTS timetravel_mor; + +CREATE TABLE IF NOT EXISTS timetravel_cow ( + id BIGINT, + name STRING, + value DOUBLE +) USING hudi +TBLPROPERTIES ( + type = 'cow', + primaryKey = 'id', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/timetravel_cow'; + +CREATE TABLE IF NOT EXISTS timetravel_mor ( + id BIGINT, + name STRING, + value DOUBLE +) USING hudi +TBLPROPERTIES ( + type = 'mor', + primaryKey = 'id', + hoodie.compact.inline = 'true', + hoodie.compact.inline.max.delta.commits = '1', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/timetravel_mor'; + +-- Insert initial data for time travel testing +INSERT INTO timetravel_cow VALUES + (1, 'initial', 100.0), + (2, 'initial', 200.0); + +INSERT INTO timetravel_mor VALUES + (1, 'initial', 100.0), + (2, 'initial', 200.0); + diff --git a/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/05_create_partition_tables.sql b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/05_create_partition_tables.sql new file mode 100644 index 00000000000000..5d3ed1982091f4 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/05_create_partition_tables.sql @@ -0,0 +1,94 @@ +-- Create partition tables for partition pruning tests +USE regression_hudi; + +-- Drop existing tables if they exist +DROP TABLE IF EXISTS one_partition_tb; +DROP TABLE IF EXISTS two_partition_tb; +DROP TABLE IF EXISTS three_partition_tb; + +-- One partition table +CREATE TABLE IF NOT EXISTS one_partition_tb ( + id BIGINT, + name STRING, + part1 INT +) USING hudi +TBLPROPERTIES ( + type = 'cow', + primaryKey = 'id', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +PARTITIONED BY (part1) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/one_partition_tb'; + +-- Two partition table +CREATE TABLE IF NOT EXISTS two_partition_tb ( + id BIGINT, + name STRING, + part1 STRING, + part2 INT +) USING hudi +TBLPROPERTIES ( + type = 'cow', + primaryKey = 'id', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +PARTITIONED BY (part1, part2) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/two_partition_tb'; + +-- Three partition table +CREATE TABLE IF NOT EXISTS three_partition_tb ( + id BIGINT, + name STRING, + part1 STRING, + part2 INT, + part3 STRING +) USING hudi +TBLPROPERTIES ( + type = 'cow', + primaryKey = 'id', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +PARTITIONED BY (part1, part2, part3) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/three_partition_tb'; + +-- Insert data +INSERT INTO one_partition_tb VALUES + (1, 'name1', 2024), + (2, 'name2', 2024), + (3, 'name3', 2025), + (4, 'name4', 2025), + (5, 'name5', 2024); + +INSERT INTO two_partition_tb VALUES + (1, 'name1', 'US', 1), + (2, 'name2', 'US', 1), + (3, 'name3', 'US', 2), + (4, 'name4', 'US', 2), + (5, 'name5', 'EU', 1), + (6, 'name6', 'EU', 2), + (7, 'name7', 'EU', 2), + (8, 'name8', 'EU', 2); + +INSERT INTO three_partition_tb VALUES + (1, 'name1', 'US', 2024, 'Q1'), + (2, 'name2', 'US', 2024, 'Q1'), + (3, 'name3', 'US', 2024, 'Q2'), + (4, 'name4', 'US', 2024, 'Q2'), + (5, 'name5', 'US', 2025, 'Q1'), + (6, 'name6', 'US', 2025, 'Q2'), + (7, 'name7', 'EU', 2024, 'Q1'), + (8, 'name8', 'EU', 2024, 'Q2'), + (9, 'name9', 'EU', 2025, 'Q1'), + (10, 'name10', 'EU', 2025, 'Q2'), + (11, 'name11', 'AS', 2025, 'Q1'), + (12, 'name12', 'AS', 2025, 'Q2'); + diff --git a/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/06_create_type_partition_tables.sql b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/06_create_type_partition_tables.sql new file mode 100644 index 00000000000000..89be65f1779681 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/06_create_type_partition_tables.sql @@ -0,0 +1,169 @@ +-- Create partition tables with different partition column types +use regression_hudi; + +DROP TABLE IF EXISTS one_partition_tb; +CREATE TABLE one_partition_tb ( + id INT, + name string +) +USING HUDI +PARTITIONED BY (part1 INT); +INSERT INTO one_partition_tb PARTITION (part1=2024) VALUES (1, 'Alice'); +INSERT INTO one_partition_tb PARTITION (part1=2024) VALUES (2, 'Bob'); +INSERT INTO one_partition_tb PARTITION (part1=2024) VALUES (3, 'Charlie'); +INSERT INTO one_partition_tb PARTITION (part1=2025) VALUES (4, 'David'); +INSERT INTO one_partition_tb PARTITION (part1=2025) VALUES (5, 'Eva'); + +DROP TABLE IF EXISTS two_partition_tb; +CREATE TABLE two_partition_tb ( + id INT, + name string +) +USING HUDI +PARTITIONED BY (part1 STRING, part2 int); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=1) VALUES (1, 'Alice'); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=1) VALUES (2, 'Bob'); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=1) VALUES (3, 'Charlie'); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=2) VALUES (4, 'David'); +INSERT INTO two_partition_tb PARTITION (part1='US', part2=2) VALUES (5, 'Eva'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=1) VALUES (6, 'Frank'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=1) VALUES (7, 'Grace'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=2) VALUES (8, 'Hannah'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=2) VALUES (9, 'Ivy'); +INSERT INTO two_partition_tb PARTITION (part1='EU', part2=2) VALUES (10, 'Jack'); + +DROP TABLE IF EXISTS three_partition_tb; +CREATE TABLE three_partition_tb ( + id INT, + name string +) +USING HUDI +PARTITIONED BY (part1 STRING, part2 INT, part3 STRING); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q1') VALUES (1, 'Alice'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q1') VALUES (2, 'Bob'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q1') VALUES (3, 'Charlie'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q2') VALUES (4, 'David'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q2') VALUES (5, 'Eva'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2025, part3='Q1') VALUES (6, 'Frank'); +INSERT INTO three_partition_tb PARTITION (part1='US', part2=2025, part3='Q2') VALUES (7, 'Grace'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2024, part3='Q1') VALUES (8, 'Hannah'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2024, part3='Q1') VALUES (9, 'Ivy'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2025, part3='Q2') VALUES (10, 'Jack'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2025, part3='Q2') VALUES (11, 'Leo'); +INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2025, part3='Q3') VALUES (12, 'Mia'); +INSERT INTO three_partition_tb PARTITION (part1='AS', part2=2025, part3='Q1') VALUES (13, 'Nina'); +INSERT INTO three_partition_tb PARTITION (part1='AS', part2=2025, part3='Q2') VALUES (14, 'Oscar'); +INSERT INTO three_partition_tb PARTITION (part1='AS', part2=2025, part3='Q3') VALUES (15, 'Paul'); + +-- partition pruning with different data types +-- boolean +DROP TABLE IF EXISTS boolean_partition_tb; +CREATE TABLE boolean_partition_tb ( + id INT, + name STRING +) +USING HUDI +PARTITIONED BY (part1 BOOLEAN); + +INSERT INTO boolean_partition_tb PARTITION (part1=true) VALUES (1, 'Alice'); +INSERT INTO boolean_partition_tb PARTITION (part1=true) VALUES (2, 'Bob'); +INSERT INTO boolean_partition_tb PARTITION (part1=false) VALUES (3, 'Charlie'); +INSERT INTO boolean_partition_tb PARTITION (part1=false) VALUES (4, 'David'); + +-- tinyint +DROP TABLE IF EXISTS tinyint_partition_tb; +CREATE TABLE tinyint_partition_tb ( + id INT, + name STRING +) +USING HUDI +PARTITIONED BY (part1 TINYINT); + +INSERT INTO tinyint_partition_tb PARTITION (part1=1) VALUES (1, 'Alice'); +INSERT INTO tinyint_partition_tb PARTITION (part1=1) VALUES (2, 'Bob'); +INSERT INTO tinyint_partition_tb PARTITION (part1=2) VALUES (3, 'Charlie'); +INSERT INTO tinyint_partition_tb PARTITION (part1=2) VALUES (4, 'David'); + +-- smallint +DROP TABLE IF EXISTS smallint_partition_tb; +CREATE TABLE smallint_partition_tb ( + id INT, + name STRING +) +USING HUDI +PARTITIONED BY (part1 SMALLINT); + +INSERT INTO smallint_partition_tb PARTITION (part1=10) VALUES (1, 'Alice'); +INSERT INTO smallint_partition_tb PARTITION (part1=10) VALUES (2, 'Bob'); +INSERT INTO smallint_partition_tb PARTITION (part1=20) VALUES (3, 'Charlie'); +INSERT INTO smallint_partition_tb PARTITION (part1=20) VALUES (4, 'David'); + +-- int +DROP TABLE IF EXISTS int_partition_tb; +CREATE TABLE int_partition_tb ( + id INT, + name STRING +) +USING HUDI +PARTITIONED BY (part1 INT); + +INSERT INTO int_partition_tb PARTITION (part1=100) VALUES (1, 'Alice'); +INSERT INTO int_partition_tb PARTITION (part1=100) VALUES (2, 'Bob'); +INSERT INTO int_partition_tb PARTITION (part1=200) VALUES (3, 'Charlie'); +INSERT INTO int_partition_tb PARTITION (part1=200) VALUES (4, 'David'); + +-- bigint +DROP TABLE IF EXISTS bigint_partition_tb; +CREATE TABLE bigint_partition_tb ( + id INT, + name STRING +) +USING HUDI +PARTITIONED BY (part1 BIGINT); + +INSERT INTO bigint_partition_tb PARTITION (part1=1234567890) VALUES (1, 'Alice'); +INSERT INTO bigint_partition_tb PARTITION (part1=1234567890) VALUES (2, 'Bob'); +INSERT INTO bigint_partition_tb PARTITION (part1=9876543210) VALUES (3, 'Charlie'); +INSERT INTO bigint_partition_tb PARTITION (part1=9876543210) VALUES (4, 'David'); + +-- string +DROP TABLE IF EXISTS string_partition_tb; +CREATE TABLE string_partition_tb ( + id INT, + name STRING +) +USING HUDI +PARTITIONED BY (part1 STRING); + +INSERT INTO string_partition_tb PARTITION (part1='RegionA') VALUES (1, 'Alice'); +INSERT INTO string_partition_tb PARTITION (part1='RegionA') VALUES (2, 'Bob'); +INSERT INTO string_partition_tb PARTITION (part1='RegionB') VALUES (3, 'Charlie'); +INSERT INTO string_partition_tb PARTITION (part1='RegionB') VALUES (4, 'David'); + +-- date +DROP TABLE IF EXISTS date_partition_tb; +CREATE TABLE date_partition_tb ( + id INT, + name STRING +) +USING HUDI +PARTITIONED BY (part1 DATE); + +INSERT INTO date_partition_tb PARTITION (part1=DATE '2023-12-01') VALUES (1, 'Alice'); +INSERT INTO date_partition_tb PARTITION (part1=DATE '2023-12-01') VALUES (2, 'Bob'); +INSERT INTO date_partition_tb PARTITION (part1=DATE '2024-01-01') VALUES (3, 'Charlie'); +INSERT INTO date_partition_tb PARTITION (part1=DATE '2024-01-01') VALUES (4, 'David'); + +-- timestamp +DROP TABLE IF EXISTS timestamp_partition_tb; +CREATE TABLE timestamp_partition_tb ( + id INT, + name STRING +) +USING HUDI +PARTITIONED BY (part1 TIMESTAMP); + +INSERT INTO timestamp_partition_tb PARTITION (part1=TIMESTAMP '2023-12-01 08:00:00') VALUES (1, 'Alice'); +INSERT INTO timestamp_partition_tb PARTITION (part1=TIMESTAMP '2023-12-01 08:00:00') VALUES (2, 'Bob'); +INSERT INTO timestamp_partition_tb PARTITION (part1=TIMESTAMP '2024-01-01 10:00:00') VALUES (3, 'Charlie'); +INSERT INTO timestamp_partition_tb PARTITION (part1=TIMESTAMP '2024-01-01 10:00:00') VALUES (4, 'David'); \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/07_create_schema_change_tables.sql b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/07_create_schema_change_tables.sql new file mode 100644 index 00000000000000..ca29a5d754f791 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/07_create_schema_change_tables.sql @@ -0,0 +1,169 @@ +-- Create schema change test tables +-- These tables are used to test schema evolution capabilities +-- Reference: test_hudi_schema_change.groovy (p2) for the complete schema evolution process +-- Reference: https://hudi.apache.org/docs/schema_evolution +USE regression_hudi; + +-- Set configuration to allow DROP COLUMN and RENAME COLUMN operations +-- According to Hudi docs, when using hive metastore, disable this config if encountering +-- "The following columns have types incompatible with the existing columns in their respective positions" +SET spark.hadoop.hive.metastore.disallow.incompatible.col.type.changes=false; +SET hoodie.schema.on.read.enable=true; +SET hoodie.metadata.enable=false; +SET hoodie.parquet.small.file.limit=100; + +-- Drop existing tables if they exist +DROP TABLE IF EXISTS hudi_sc_orc_cow; +DROP TABLE IF EXISTS hudi_sc_parquet_cow; + +-- ============================================================================ +-- hudi_sc_orc_cow table - Complete schema evolution process +-- ============================================================================ + +-- Step 1: Create table with initial schema (id, name, age) +CREATE TABLE IF NOT EXISTS hudi_sc_orc_cow ( + id INT, + name STRING, + age INT +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.base.file.format = 'orc', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.parquet.small.file.limit = '100', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/hudi_sc_orc_cow'; + +-- Step 2: Insert initial data +INSERT INTO hudi_sc_orc_cow VALUES (1, 'Alice', 25); +INSERT INTO hudi_sc_orc_cow VALUES (2, 'Bob', 30); + +-- Step 3: Add city column (schema: id, name, age, city) +ALTER TABLE hudi_sc_orc_cow ADD COLUMNS (city STRING); +INSERT INTO hudi_sc_orc_cow VALUES (3, 'Charlie', 28, 'New York'); + +-- Step 4: Drop age column (schema: id, name, city) +-- Note: DROP COLUMN operation is disabled in current version +-- ALTER TABLE hudi_sc_orc_cow DROP COLUMN age; +-- Schema remains: id, name, age, city +INSERT INTO hudi_sc_orc_cow VALUES (4, 'David', 35, 'Los Angeles'); + +-- Step 5: Rename name to full_name (schema: id, full_name, city) +-- Note: RENAME COLUMN operation is disabled in current version +-- ALTER TABLE hudi_sc_orc_cow RENAME COLUMN name TO full_name; +-- Schema remains: id, name, age, city +INSERT INTO hudi_sc_orc_cow VALUES (5, 'Eve', 28, 'Chicago'); + +-- Step 6: Add score column (schema: id, name, age, city, score) +-- Note: Column position (AFTER id) may not be supported, adding at the end +ALTER TABLE hudi_sc_orc_cow ADD COLUMNS (score FLOAT); +INSERT INTO hudi_sc_orc_cow VALUES (6, 'Frank', 32, 'San Francisco', 85.5); + +-- Step 7: Change city position to after id (schema: id, city, score, full_name) +-- Note: REORDER operation (ALTER COLUMN ... AFTER) is disabled in current version +-- ALTER TABLE hudi_sc_orc_cow ALTER COLUMN city AFTER id; +-- Schema remains: id, name, age, city, score +INSERT INTO hudi_sc_orc_cow VALUES (7, 'Grace', 29, 'Seattle', 90.0); + +-- Step 8: Change score type from float to double +-- According to Hudi docs: ALTER TABLE tableName ALTER COLUMN column_name TYPE type +ALTER TABLE hudi_sc_orc_cow ALTER COLUMN score TYPE DOUBLE; +INSERT INTO hudi_sc_orc_cow VALUES (8, 'Heidi', 31, 'Portland', 95.5); + +-- Step 9: Rename city to location (schema: id, location, score, full_name) +-- Note: RENAME COLUMN operation is disabled in current version +-- ALTER TABLE hudi_sc_orc_cow RENAME COLUMN city TO location; +-- Schema remains: id, name, age, city, score +INSERT INTO hudi_sc_orc_cow VALUES (9, 'Ivan', 26, 'Denver', 88.0); + +-- Step 10: Change location position to after full_name (schema: id, score, full_name, location) +-- Note: REORDER operation (ALTER COLUMN ... AFTER) is disabled in current version +-- ALTER TABLE hudi_sc_orc_cow ALTER COLUMN location AFTER full_name; +-- Schema remains: id, name, age, city, score +INSERT INTO hudi_sc_orc_cow VALUES (10, 'Judy', 27, 'Austin', 101.1); + +-- Step 11: Add age column (final schema: id, name, age, city, score) +-- Note: age column already exists, so this step is skipped +-- ALTER TABLE hudi_sc_orc_cow ADD COLUMN age INT; +INSERT INTO hudi_sc_orc_cow VALUES (11, 'QQ', 24, 'cn', 222.2); + +-- ============================================================================ +-- hudi_sc_parquet_cow table - Same schema evolution process +-- ============================================================================ + +-- Step 1: Create table with initial schema (id, name, age) +CREATE TABLE IF NOT EXISTS hudi_sc_parquet_cow ( + id INT, + name STRING, + age INT +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.base.file.format = 'parquet', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.parquet.small.file.limit = '100', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/hudi_sc_parquet_cow'; + +-- Step 2: Insert initial data +INSERT INTO hudi_sc_parquet_cow VALUES (1, 'Alice', 25); +INSERT INTO hudi_sc_parquet_cow VALUES (2, 'Bob', 30); + +-- Step 3: Add city column (schema: id, name, age, city) +ALTER TABLE hudi_sc_parquet_cow ADD COLUMNS (city STRING); +INSERT INTO hudi_sc_parquet_cow VALUES (3, 'Charlie', 28, 'New York'); + +-- Step 4: Drop age column (schema: id, name, city) +-- Note: DROP COLUMN operation is disabled in current version +-- ALTER TABLE hudi_sc_parquet_cow DROP COLUMN age; +-- Schema remains: id, name, age, city +INSERT INTO hudi_sc_parquet_cow VALUES (4, 'David', 35, 'Los Angeles'); + +-- Step 5: Rename name to full_name (schema: id, full_name, city) +-- Note: RENAME COLUMN operation is disabled in current version +-- ALTER TABLE hudi_sc_parquet_cow RENAME COLUMN name TO full_name; +-- Schema remains: id, name, age, city +INSERT INTO hudi_sc_parquet_cow VALUES (5, 'Eve', 28, 'Chicago'); + +-- Step 6: Add score column (schema: id, name, age, city, score) +-- Note: Column position (AFTER id) may not be supported, adding at the end +ALTER TABLE hudi_sc_parquet_cow ADD COLUMNS (score FLOAT); +INSERT INTO hudi_sc_parquet_cow VALUES (6, 'Frank', 32, 'San Francisco', 85.5); + +-- Step 7: Change city position to after id (schema: id, city, score, full_name) +-- Note: REORDER operation (ALTER COLUMN ... AFTER) is disabled in current version +-- ALTER TABLE hudi_sc_parquet_cow ALTER COLUMN city AFTER id; +-- Schema remains: id, name, age, city, score +INSERT INTO hudi_sc_parquet_cow VALUES (7, 'Grace', 29, 'Seattle', 90.0); + +-- Step 8: Change score type from float to double +-- According to Hudi docs: ALTER TABLE tableName ALTER COLUMN column_name TYPE type +ALTER TABLE hudi_sc_parquet_cow ALTER COLUMN score TYPE DOUBLE; +INSERT INTO hudi_sc_parquet_cow VALUES (8, 'Heidi', 31, 'Portland', 95.5); + +-- Step 9: Rename city to location (schema: id, location, score, full_name) +-- Note: RENAME COLUMN operation is disabled in current version +-- ALTER TABLE hudi_sc_parquet_cow RENAME COLUMN city TO location; +-- Schema remains: id, name, age, city, score +INSERT INTO hudi_sc_parquet_cow VALUES (9, 'Ivan', 26, 'Denver', 88.0); + +-- Step 10: Change location position to after full_name (schema: id, score, full_name, location) +-- Note: REORDER operation (ALTER COLUMN ... AFTER) is disabled in current version +-- ALTER TABLE hudi_sc_parquet_cow ALTER COLUMN location AFTER full_name; +-- Schema remains: id, name, age, city, score +INSERT INTO hudi_sc_parquet_cow VALUES (10, 'Judy', 27, 'Austin', 101.1); + +-- Step 11: Add age column (final schema: id, name, age, city, score) +-- Note: age column already exists, so this step is skipped +-- ALTER TABLE hudi_sc_parquet_cow ADD COLUMN age INT; +INSERT INTO hudi_sc_parquet_cow VALUES (11, 'QQ', 24, 'cn', 222.2); diff --git a/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/08_create_full_schema_change_tables.sql b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/08_create_full_schema_change_tables.sql new file mode 100644 index 00000000000000..e2b22a9ef0ee5a --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/08_create_full_schema_change_tables.sql @@ -0,0 +1,151 @@ +-- Create full schema change test tables with complex types (map, struct, array) +-- These tables are used to test complex schema evolution scenarios +-- Tables are created with initial schema, then ALTER TABLE is used to add complex type columns +USE regression_hudi; + +-- Drop existing tables if they exist +DROP TABLE IF EXISTS hudi_full_schema_change_parquet; +DROP TABLE IF EXISTS hudi_full_schema_change_orc; + +-- Create hudi_full_schema_change_parquet table with initial schema (only id) +CREATE TABLE IF NOT EXISTS hudi_full_schema_change_parquet ( + id INT +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.parquet.small.file.limit = '100', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/hudi_full_schema_change_parquet'; + +-- Insert initial data (only id) +INSERT INTO hudi_full_schema_change_parquet (id) VALUES + (0), (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11), (12), (13), (14), (15), (16), (17), (18), (19), (20), (21); + +-- Execute schema changes: Add complex type columns step by step +ALTER TABLE hudi_full_schema_change_parquet ADD COLUMNS ( + new_map_column MAP>, + struct_column STRUCT, + array_column ARRAY> +); + +-- Insert data with new columns +INSERT INTO hudi_full_schema_change_parquet VALUES + (0, map('person0', named_struct('full_name', 'zero', 'age', 2, 'gender', null)), named_struct('country', null, 'city', 'cn', 'population', 1000000), array(named_struct('item', 'Apple', 'quantity', null, 'category', null), named_struct('item', 'Banana', 'quantity', null, 'category', null))), + (1, map('person1', named_struct('full_name', 'Alice', 'age', 25, 'gender', null)), named_struct('country', null, 'city', 'New York', 'population', 8000000), array(named_struct('item', 'Apple', 'quantity', null, 'category', null), named_struct('item', 'Banana', 'quantity', null, 'category', null))), + (2, map('person2', named_struct('full_name', 'Bob', 'age', 30, 'gender', null)), named_struct('country', null, 'city', 'Los Angeles', 'population', 4000000), array(named_struct('item', 'Orange', 'quantity', null, 'category', null), named_struct('item', 'Grape', 'quantity', null, 'category', null))), + (3, map('person3', named_struct('full_name', 'Charlie', 'age', 28, 'gender', null)), named_struct('country', null, 'city', 'Chicago', 'population', 2700000), array(named_struct('item', 'Pear', 'quantity', null, 'category', null), named_struct('item', 'Mango', 'quantity', null, 'category', null))), + (4, map('person4', named_struct('full_name', 'David', 'age', 35, 'gender', null)), named_struct('country', null, 'city', 'Houston', 'population', 2300000), array(named_struct('item', 'Kiwi', 'quantity', null, 'category', null), named_struct('item', 'Pineapple', 'quantity', null, 'category', null))), + (5, map('person5', named_struct('full_name', 'Eve', 'age', 40, 'gender', null)), named_struct('country', 'USA', 'city', 'Phoenix', 'population', 1600000), array(named_struct('item', 'Lemon', 'quantity', null, 'category', null), named_struct('item', 'Lime', 'quantity', null, 'category', null))), + (6, map('person6', named_struct('full_name', 'Frank', 'age', 22, 'gender', null)), named_struct('country', 'USA', 'city', 'Philadelphia', 'population', 1500000), array(named_struct('item', 'Watermelon', 'quantity', null, 'category', null), named_struct('item', 'Strawberry', 'quantity', null, 'category', null))), + (7, map('person7', named_struct('full_name', 'Grace', 'age', 27, 'gender', null)), named_struct('country', 'USA', 'city', 'San Antonio', 'population', 1500000), array(named_struct('item', 'Blueberry', 'quantity', null, 'category', null), named_struct('item', 'Raspberry', 'quantity', null, 'category', null))), + (8, map('person8', named_struct('full_name', 'Hank', 'age', 32, 'gender', null)), named_struct('country', 'USA', 'city', 'San Diego', 'population', 1400000), array(named_struct('item', 'Cherry', 'quantity', 5, 'category', null), named_struct('item', 'Plum', 'quantity', 3, 'category', null))), + (9, map('person9', named_struct('full_name', 'Ivy', 'age', 29, 'gender', null)), named_struct('country', 'USA', 'city', 'Dallas', 'population', 1300000), array(named_struct('item', 'Peach', 'quantity', 4, 'category', null), named_struct('item', 'Apricot', 'quantity', 2, 'category', null))), + (10, map('person10', named_struct('full_name', 'Jack', 'age', 26, 'gender', null)), named_struct('country', 'USA', 'city', 'Austin', 'population', 950000), array(named_struct('item', 'Fig', 'quantity', 6, 'category', null), named_struct('item', 'Date', 'quantity', 7, 'category', null))), + (11, map('person11', named_struct('full_name', 'Karen', 'age', 31, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'Seattle', 'population', 750000), array(named_struct('item', 'Coconut', 'quantity', 1, 'category', null), named_struct('item', 'Papaya', 'quantity', 2, 'category', null))), + (12, map('person12', named_struct('full_name', 'Leo', 'age', 24, 'gender', 'Male')), named_struct('country', 'USA', 'city', 'Portland', 'population', 650000), array(named_struct('item', 'Guava', 'quantity', 3, 'category', null), named_struct('item', 'Lychee', 'quantity', 4, 'category', null))), + (13, map('person13', named_struct('full_name', 'Mona', 'age', 33, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'Denver', 'population', 700000), array(named_struct('item', 'Avocado', 'quantity', 2, 'category', 'Fruit'), named_struct('item', 'Tomato', 'quantity', 5, 'category', 'Vegetable'))), + (14, map('person14', named_struct('full_name', 'Nina', 'age', 28, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'Miami', 'population', 450000), array(named_struct('item', 'Cucumber', 'quantity', 6, 'category', 'Vegetable'), named_struct('item', 'Carrot', 'quantity', 7, 'category', 'Vegetable'))), + (15, map('person15', named_struct('full_name', 'Emma Smith', 'age', 30, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'New York', 'population', 8000000), array(named_struct('item', 'Banana', 'quantity', 3, 'category', 'Fruit'), named_struct('item', 'Potato', 'quantity', 8, 'category', 'Vegetable'))); + +-- Add struct_column2 +ALTER TABLE hudi_full_schema_change_parquet ADD COLUMNS ( + struct_column2 STRUCT, new_a: STRUCT, c: INT> +); + +-- Update existing records (id 0-15) to include struct_column2 as NULL +-- This ensures all records have the same schema structure +INSERT INTO hudi_full_schema_change_parquet VALUES + (0, map('person0', named_struct('full_name', 'zero', 'age', 2, 'gender', null)), named_struct('country', null, 'city', 'cn', 'population', 1000000), array(named_struct('item', 'Apple', 'quantity', null, 'category', null), named_struct('item', 'Banana', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (1, map('person1', named_struct('full_name', 'Alice', 'age', 25, 'gender', null)), named_struct('country', null, 'city', 'New York', 'population', 8000000), array(named_struct('item', 'Apple', 'quantity', null, 'category', null), named_struct('item', 'Banana', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (2, map('person2', named_struct('full_name', 'Bob', 'age', 30, 'gender', null)), named_struct('country', null, 'city', 'Los Angeles', 'population', 4000000), array(named_struct('item', 'Orange', 'quantity', null, 'category', null), named_struct('item', 'Grape', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (3, map('person3', named_struct('full_name', 'Charlie', 'age', 28, 'gender', null)), named_struct('country', null, 'city', 'Chicago', 'population', 2700000), array(named_struct('item', 'Pear', 'quantity', null, 'category', null), named_struct('item', 'Mango', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (4, map('person4', named_struct('full_name', 'David', 'age', 35, 'gender', null)), named_struct('country', null, 'city', 'Houston', 'population', 2300000), array(named_struct('item', 'Kiwi', 'quantity', null, 'category', null), named_struct('item', 'Pineapple', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (5, map('person5', named_struct('full_name', 'Eve', 'age', 40, 'gender', null)), named_struct('country', 'USA', 'city', 'Phoenix', 'population', 1600000), array(named_struct('item', 'Lemon', 'quantity', null, 'category', null), named_struct('item', 'Lime', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (6, map('person6', named_struct('full_name', 'Frank', 'age', 22, 'gender', null)), named_struct('country', 'USA', 'city', 'Philadelphia', 'population', 1500000), array(named_struct('item', 'Watermelon', 'quantity', null, 'category', null), named_struct('item', 'Strawberry', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (7, map('person7', named_struct('full_name', 'Grace', 'age', 27, 'gender', null)), named_struct('country', 'USA', 'city', 'San Antonio', 'population', 1500000), array(named_struct('item', 'Blueberry', 'quantity', null, 'category', null), named_struct('item', 'Raspberry', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (8, map('person8', named_struct('full_name', 'Hank', 'age', 32, 'gender', null)), named_struct('country', 'USA', 'city', 'San Diego', 'population', 1400000), array(named_struct('item', 'Cherry', 'quantity', 5, 'category', null), named_struct('item', 'Plum', 'quantity', 3, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (9, map('person9', named_struct('full_name', 'Ivy', 'age', 29, 'gender', null)), named_struct('country', 'USA', 'city', 'Dallas', 'population', 1300000), array(named_struct('item', 'Peach', 'quantity', 4, 'category', null), named_struct('item', 'Apricot', 'quantity', 2, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (10, map('person10', named_struct('full_name', 'Jack', 'age', 26, 'gender', null)), named_struct('country', 'USA', 'city', 'Austin', 'population', 950000), array(named_struct('item', 'Fig', 'quantity', 6, 'category', null), named_struct('item', 'Date', 'quantity', 7, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (11, map('person11', named_struct('full_name', 'Karen', 'age', 31, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'Seattle', 'population', 750000), array(named_struct('item', 'Coconut', 'quantity', 1, 'category', null), named_struct('item', 'Papaya', 'quantity', 2, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (12, map('person12', named_struct('full_name', 'Leo', 'age', 24, 'gender', 'Male')), named_struct('country', 'USA', 'city', 'Portland', 'population', 650000), array(named_struct('item', 'Guava', 'quantity', 3, 'category', null), named_struct('item', 'Lychee', 'quantity', 4, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (13, map('person13', named_struct('full_name', 'Mona', 'age', 33, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'Denver', 'population', 700000), array(named_struct('item', 'Avocado', 'quantity', 2, 'category', 'Fruit'), named_struct('item', 'Tomato', 'quantity', 5, 'category', 'Vegetable')), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (14, map('person14', named_struct('full_name', 'Nina', 'age', 28, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'Miami', 'population', 450000), array(named_struct('item', 'Cucumber', 'quantity', 6, 'category', 'Vegetable'), named_struct('item', 'Carrot', 'quantity', 7, 'category', 'Vegetable')), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (15, map('person15', named_struct('full_name', 'Emma Smith', 'age', 30, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'New York', 'population', 8000000), array(named_struct('item', 'Banana', 'quantity', 3, 'category', 'Fruit'), named_struct('item', 'Potato', 'quantity', 8, 'category', 'Vegetable')), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)); + +-- Create hudi_full_schema_change_orc table with initial schema (only id) +CREATE TABLE IF NOT EXISTS hudi_full_schema_change_orc ( + id INT +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.base.file.format = 'orc', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/hudi_full_schema_change_orc'; + +-- Insert initial data (only id) +INSERT INTO hudi_full_schema_change_orc (id) VALUES + (0), (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11), (12), (13), (14), (15), (16), (17), (18), (19), (20), (21); + +-- Execute schema changes: Add complex type columns step by step +ALTER TABLE hudi_full_schema_change_orc ADD COLUMNS ( + new_map_column MAP>, + struct_column STRUCT, + array_column ARRAY> +); + +-- Insert data with new columns +INSERT INTO hudi_full_schema_change_orc VALUES + (0, map('person0', named_struct('full_name', 'zero', 'age', 2, 'gender', null)), named_struct('country', null, 'city', 'cn', 'population', 1000000), array(named_struct('item', 'Apple', 'quantity', null, 'category', null), named_struct('item', 'Banana', 'quantity', null, 'category', null))), + (1, map('person1', named_struct('full_name', 'Alice', 'age', 25, 'gender', null)), named_struct('country', null, 'city', 'New York', 'population', 8000000), array(named_struct('item', 'Apple', 'quantity', null, 'category', null), named_struct('item', 'Banana', 'quantity', null, 'category', null))), + (2, map('person2', named_struct('full_name', 'Bob', 'age', 30, 'gender', null)), named_struct('country', null, 'city', 'Los Angeles', 'population', 4000000), array(named_struct('item', 'Orange', 'quantity', null, 'category', null), named_struct('item', 'Grape', 'quantity', null, 'category', null))), + (3, map('person3', named_struct('full_name', 'Charlie', 'age', 28, 'gender', null)), named_struct('country', null, 'city', 'Chicago', 'population', 2700000), array(named_struct('item', 'Pear', 'quantity', null, 'category', null), named_struct('item', 'Mango', 'quantity', null, 'category', null))), + (4, map('person4', named_struct('full_name', 'David', 'age', 35, 'gender', null)), named_struct('country', null, 'city', 'Houston', 'population', 2300000), array(named_struct('item', 'Kiwi', 'quantity', null, 'category', null), named_struct('item', 'Pineapple', 'quantity', null, 'category', null))), + (5, map('person5', named_struct('full_name', 'Eve', 'age', 40, 'gender', null)), named_struct('country', 'USA', 'city', 'Phoenix', 'population', 1600000), array(named_struct('item', 'Lemon', 'quantity', null, 'category', null), named_struct('item', 'Lime', 'quantity', null, 'category', null))), + (6, map('person6', named_struct('full_name', 'Frank', 'age', 22, 'gender', null)), named_struct('country', 'USA', 'city', 'Philadelphia', 'population', 1500000), array(named_struct('item', 'Watermelon', 'quantity', null, 'category', null), named_struct('item', 'Strawberry', 'quantity', null, 'category', null))), + (7, map('person7', named_struct('full_name', 'Grace', 'age', 27, 'gender', null)), named_struct('country', 'USA', 'city', 'San Antonio', 'population', 1500000), array(named_struct('item', 'Blueberry', 'quantity', null, 'category', null), named_struct('item', 'Raspberry', 'quantity', null, 'category', null))), + (8, map('person8', named_struct('full_name', 'Hank', 'age', 32, 'gender', null)), named_struct('country', 'USA', 'city', 'San Diego', 'population', 1400000), array(named_struct('item', 'Cherry', 'quantity', 5, 'category', null), named_struct('item', 'Plum', 'quantity', 3, 'category', null))), + (9, map('person9', named_struct('full_name', 'Ivy', 'age', 29, 'gender', null)), named_struct('country', 'USA', 'city', 'Dallas', 'population', 1300000), array(named_struct('item', 'Peach', 'quantity', 4, 'category', null), named_struct('item', 'Apricot', 'quantity', 2, 'category', null))), + (10, map('person10', named_struct('full_name', 'Jack', 'age', 26, 'gender', null)), named_struct('country', 'USA', 'city', 'Austin', 'population', 950000), array(named_struct('item', 'Fig', 'quantity', 6, 'category', null), named_struct('item', 'Date', 'quantity', 7, 'category', null))), + (11, map('person11', named_struct('full_name', 'Karen', 'age', 31, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'Seattle', 'population', 750000), array(named_struct('item', 'Coconut', 'quantity', 1, 'category', null), named_struct('item', 'Papaya', 'quantity', 2, 'category', null))), + (12, map('person12', named_struct('full_name', 'Leo', 'age', 24, 'gender', 'Male')), named_struct('country', 'USA', 'city', 'Portland', 'population', 650000), array(named_struct('item', 'Guava', 'quantity', 3, 'category', null), named_struct('item', 'Lychee', 'quantity', 4, 'category', null))), + (13, map('person13', named_struct('full_name', 'Mona', 'age', 33, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'Denver', 'population', 700000), array(named_struct('item', 'Avocado', 'quantity', 2, 'category', 'Fruit'), named_struct('item', 'Tomato', 'quantity', 5, 'category', 'Vegetable'))), + (14, map('person14', named_struct('full_name', 'Nina', 'age', 28, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'Miami', 'population', 450000), array(named_struct('item', 'Cucumber', 'quantity', 6, 'category', 'Vegetable'), named_struct('item', 'Carrot', 'quantity', 7, 'category', 'Vegetable'))), + (15, map('person15', named_struct('full_name', 'Emma Smith', 'age', 30, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'New York', 'population', 8000000), array(named_struct('item', 'Banana', 'quantity', 3, 'category', 'Fruit'), named_struct('item', 'Potato', 'quantity', 8, 'category', 'Vegetable'))); + +-- Add struct_column2 +ALTER TABLE hudi_full_schema_change_orc ADD COLUMNS ( + struct_column2 STRUCT, new_a: STRUCT, c: INT> +); + +-- Update existing records (id 0-15) to include struct_column2 as NULL +-- This ensures all records have the same schema structure +INSERT INTO hudi_full_schema_change_orc VALUES + (0, map('person0', named_struct('full_name', 'zero', 'age', 2, 'gender', null)), named_struct('country', null, 'city', 'cn', 'population', 1000000), array(named_struct('item', 'Apple', 'quantity', null, 'category', null), named_struct('item', 'Banana', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (1, map('person1', named_struct('full_name', 'Alice', 'age', 25, 'gender', null)), named_struct('country', null, 'city', 'New York', 'population', 8000000), array(named_struct('item', 'Apple', 'quantity', null, 'category', null), named_struct('item', 'Banana', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (2, map('person2', named_struct('full_name', 'Bob', 'age', 30, 'gender', null)), named_struct('country', null, 'city', 'Los Angeles', 'population', 4000000), array(named_struct('item', 'Orange', 'quantity', null, 'category', null), named_struct('item', 'Grape', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (3, map('person3', named_struct('full_name', 'Charlie', 'age', 28, 'gender', null)), named_struct('country', null, 'city', 'Chicago', 'population', 2700000), array(named_struct('item', 'Pear', 'quantity', null, 'category', null), named_struct('item', 'Mango', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (4, map('person4', named_struct('full_name', 'David', 'age', 35, 'gender', null)), named_struct('country', null, 'city', 'Houston', 'population', 2300000), array(named_struct('item', 'Kiwi', 'quantity', null, 'category', null), named_struct('item', 'Pineapple', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (5, map('person5', named_struct('full_name', 'Eve', 'age', 40, 'gender', null)), named_struct('country', 'USA', 'city', 'Phoenix', 'population', 1600000), array(named_struct('item', 'Lemon', 'quantity', null, 'category', null), named_struct('item', 'Lime', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (6, map('person6', named_struct('full_name', 'Frank', 'age', 22, 'gender', null)), named_struct('country', 'USA', 'city', 'Philadelphia', 'population', 1500000), array(named_struct('item', 'Watermelon', 'quantity', null, 'category', null), named_struct('item', 'Strawberry', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (7, map('person7', named_struct('full_name', 'Grace', 'age', 27, 'gender', null)), named_struct('country', 'USA', 'city', 'San Antonio', 'population', 1500000), array(named_struct('item', 'Blueberry', 'quantity', null, 'category', null), named_struct('item', 'Raspberry', 'quantity', null, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (8, map('person8', named_struct('full_name', 'Hank', 'age', 32, 'gender', null)), named_struct('country', 'USA', 'city', 'San Diego', 'population', 1400000), array(named_struct('item', 'Cherry', 'quantity', 5, 'category', null), named_struct('item', 'Plum', 'quantity', 3, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (9, map('person9', named_struct('full_name', 'Ivy', 'age', 29, 'gender', null)), named_struct('country', 'USA', 'city', 'Dallas', 'population', 1300000), array(named_struct('item', 'Peach', 'quantity', 4, 'category', null), named_struct('item', 'Apricot', 'quantity', 2, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (10, map('person10', named_struct('full_name', 'Jack', 'age', 26, 'gender', null)), named_struct('country', 'USA', 'city', 'Austin', 'population', 950000), array(named_struct('item', 'Fig', 'quantity', 6, 'category', null), named_struct('item', 'Date', 'quantity', 7, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (11, map('person11', named_struct('full_name', 'Karen', 'age', 31, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'Seattle', 'population', 750000), array(named_struct('item', 'Coconut', 'quantity', 1, 'category', null), named_struct('item', 'Papaya', 'quantity', 2, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (12, map('person12', named_struct('full_name', 'Leo', 'age', 24, 'gender', 'Male')), named_struct('country', 'USA', 'city', 'Portland', 'population', 650000), array(named_struct('item', 'Guava', 'quantity', 3, 'category', null), named_struct('item', 'Lychee', 'quantity', 4, 'category', null)), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (13, map('person13', named_struct('full_name', 'Mona', 'age', 33, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'Denver', 'population', 700000), array(named_struct('item', 'Avocado', 'quantity', 2, 'category', 'Fruit'), named_struct('item', 'Tomato', 'quantity', 5, 'category', 'Vegetable')), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (14, map('person14', named_struct('full_name', 'Nina', 'age', 28, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'Miami', 'population', 450000), array(named_struct('item', 'Cucumber', 'quantity', 6, 'category', 'Vegetable'), named_struct('item', 'Carrot', 'quantity', 7, 'category', 'Vegetable')), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)), + (15, map('person15', named_struct('full_name', 'Emma Smith', 'age', 30, 'gender', 'Female')), named_struct('country', 'USA', 'city', 'New York', 'population', 8000000), array(named_struct('item', 'Banana', 'quantity', 3, 'category', 'Fruit'), named_struct('item', 'Potato', 'quantity', 8, 'category', 'Vegetable')), CAST(NULL AS STRUCT, new_a: STRUCT, c: INT>)); + diff --git a/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/09_create_schema_evolution_tables.sql b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/09_create_schema_evolution_tables.sql new file mode 100644 index 00000000000000..27aca42b4af3e8 --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/09_create_schema_evolution_tables.sql @@ -0,0 +1,264 @@ +-- Create schema evolution test tables +-- These tables are used to test schema evolution operations supported by Hudi: +-- 1. Adding columns (ADD COLUMNS) +-- 2. Dropping columns (DROP COLUMNS) +-- 3. Renaming columns (RENAME COLUMN) +-- 4. Reordering columns (field order changes) +USE regression_hudi; + +-- Drop existing tables if they exist +DROP TABLE IF EXISTS adding_simple_columns_table; +DROP TABLE IF EXISTS deleting_simple_columns_table; +DROP TABLE IF EXISTS renaming_simple_columns_table; +DROP TABLE IF EXISTS reordering_columns_table; +DROP TABLE IF EXISTS adding_complex_columns_table; +DROP TABLE IF EXISTS deleting_complex_columns_table; +DROP TABLE IF EXISTS renaming_complex_columns_table; + +-- ============================================================================ +-- 1. adding_simple_columns_table - Testing adding simple type columns +-- ============================================================================ +-- Step 1: Create table with initial schema (id, name) +CREATE TABLE IF NOT EXISTS adding_simple_columns_table ( + id STRING, + name STRING +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/adding_simple_columns_table'; + +-- Insert initial data with schema: id, name +INSERT INTO adding_simple_columns_table (id, name) VALUES + ('1', 'Alice'), + ('2', 'Bob'), + ('3', 'Cathy'); + +-- Step 2: Use ALTER TABLE to add age and city columns +ALTER TABLE adding_simple_columns_table ADD COLUMNS (age INT, city STRING); + +-- Step 3: Insert data with evolved schema (id, name, age, city) +INSERT INTO adding_simple_columns_table (id, name, age, city) VALUES + ('4', 'David', 25, 'New York'), + ('5', 'Eva', 30, 'Los Angeles'), + ('6', 'Frank', 28, 'Chicago'); + +-- ============================================================================ +-- 2. deleting_simple_columns_table - Testing dropping simple type columns +-- ============================================================================ +-- Step 1: Create table with initial schema (id, name, age, city) +CREATE TABLE IF NOT EXISTS deleting_simple_columns_table ( + id STRING, + name STRING, + age INT, + city STRING +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/deleting_simple_columns_table'; + +-- Insert initial data with schema: id, name, age, city +INSERT INTO deleting_simple_columns_table (id, name, age, city) VALUES + ('1', 'Alice', 25, 'New York'), + ('2', 'Bob', 30, 'Los Angeles'), + ('3', 'Cathy', 28, 'Chicago'); + +-- Step 2: Insert data with evolved schema (id, name only) +-- Note: Hudi doesn't support ALTER TABLE DROP COLUMNS directly +-- We write data without age and city, and Hudi's schema evolution will handle it when reading +-- The table definition in Hive Metastore still contains all columns, but Hudi can read data correctly +INSERT INTO deleting_simple_columns_table (id, name) VALUES + ('4', 'David'), + ('5', 'Eva'), + ('6', 'Frank'); + +-- Step 3: Update Hive Metastore table definition +-- Note: Hudi doesn't support DROP COLUMNS via ALTER TABLE +-- The schema evolution is handled automatically when reading data +-- ALTER TABLE deleting_simple_columns_table DROP COLUMNS (age, city); + +-- ============================================================================ +-- 3. renaming_simple_columns_table - Testing renaming simple type columns +-- ============================================================================ +-- Note: Hudi doesn't support RENAME COLUMN operation +-- Since column renaming requires explicit metadata updates and Hudi's schema evolution +-- cannot automatically handle column name changes, we skip this test case +-- The table is created but not used for testing +CREATE TABLE IF NOT EXISTS renaming_simple_columns_table ( + id STRING, + name STRING +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/renaming_simple_columns_table'; + +-- Insert initial data with schema: id, name +INSERT INTO renaming_simple_columns_table (id, name) VALUES + ('1', 'Alice'), + ('2', 'Bob'), + ('3', 'Cathy'); + +-- Note: Column renaming is not supported by Hudi's schema evolution +-- Hudi cannot automatically handle column name changes without explicit metadata updates +-- ALTER TABLE renaming_simple_columns_table RENAME COLUMN name TO full_name; + +-- ============================================================================ +-- 4. reordering_columns_table - Testing column reordering +-- ============================================================================ +-- Step 1: Create table with initial schema (id, name, age) +CREATE TABLE IF NOT EXISTS reordering_columns_table ( + id STRING, + name STRING, + age INT +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/reordering_columns_table'; + +-- Insert initial data with schema: id, name, age +INSERT INTO reordering_columns_table (id, name, age) VALUES + ('1', 'Alice', 25), + ('2', 'Bob', 30), + ('3', 'Cathy', 28); + +-- Step 2: Add new column city after name (using ALTER TABLE with column position) +-- Note: Hudi may support column position specification +ALTER TABLE reordering_columns_table ADD COLUMNS (city STRING); + +-- Step 3: Insert data with evolved schema (id, name, city, age) +-- The actual column order in Hive Metastore may differ, but data should be readable +INSERT INTO reordering_columns_table (id, name, city, age) VALUES + ('4', 'David', 'New York', 26), + ('5', 'Eva', 'Los Angeles', 31), + ('6', 'Frank', 'Chicago', 29); + +-- ============================================================================ +-- 5. adding_complex_columns_table - Testing adding complex type columns +-- ============================================================================ +-- Note: Hudi doesn't support ALTER TABLE CHANGE COLUMN for modifying struct field definitions +-- Since we cannot add struct fields dynamically, we create the table with all fields from the start +-- This table is created but schema evolution testing for struct fields is not supported +CREATE TABLE IF NOT EXISTS adding_complex_columns_table ( + id STRING, + name STRING, + info STRUCT +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/adding_complex_columns_table'; + +-- Insert initial data with schema: id, name, info STRUCT (email will be NULL) +INSERT INTO adding_complex_columns_table VALUES + ('1', 'Alice', named_struct('age', 25, 'address', 'Guangzhou', 'email', CAST(NULL AS STRING))), + ('2', 'Bob', named_struct('age', 30, 'address', 'Shanghai', 'email', CAST(NULL AS STRING))), + ('3', 'Cathy', named_struct('age', 28, 'address', 'Beijing', 'email', CAST(NULL AS STRING))); + +-- Insert data with evolved schema (id, name, info STRUCT) +INSERT INTO adding_complex_columns_table VALUES + ('4', 'David', named_struct('age', 25, 'address', 'Shenzhen', 'email', 'david@example.com')), + ('5', 'Eva', named_struct('age', 30, 'address', 'Chengdu', 'email', 'eva@example.com')), + ('6', 'Frank', named_struct('age', 28, 'address', 'Wuhan', 'email', 'frank@example.com')); + +-- ============================================================================ +-- 6. deleting_complex_columns_table - Testing dropping complex type columns +-- ============================================================================ +-- Step 1: Create table with initial schema (id, name, info STRUCT) +CREATE TABLE IF NOT EXISTS deleting_complex_columns_table ( + id STRING, + name STRING, + info STRUCT +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/deleting_complex_columns_table'; + +-- Insert initial data with schema: id, name, info STRUCT +INSERT INTO deleting_complex_columns_table VALUES + ('1', 'Alice', named_struct('age', 25, 'address', 'Guangzhou', 'email', 'alice@example.com')), + ('2', 'Bob', named_struct('age', 30, 'address', 'Shanghai', 'email', 'bob@example.com')), + ('3', 'Cathy', named_struct('age', 28, 'address', 'Beijing', 'email', 'cathy@example.com')); + +-- Step 2: Insert data with evolved schema (id, name, info STRUCT) +-- Note: Hudi doesn't support removing struct fields dynamically +-- We write data with email set to NULL to simulate field removal +INSERT INTO deleting_complex_columns_table VALUES + ('4', 'David', named_struct('age', 25, 'address', 'Shenzhen', 'email', CAST(NULL AS STRING))), + ('5', 'Eva', named_struct('age', 30, 'address', 'Chengdu', 'email', CAST(NULL AS STRING))), + ('6', 'Frank', named_struct('age', 28, 'address', 'Wuhan', 'email', CAST(NULL AS STRING))); + +-- Note: Hudi doesn't support ALTER TABLE CHANGE COLUMN for modifying struct field definitions +-- The table definition remains with all fields, but we can test reading data with NULL values + +-- ============================================================================ +-- 7. renaming_complex_columns_table - Testing renaming complex type columns +-- ============================================================================ +-- Note: Hudi doesn't support renaming struct fields +-- Since field renaming in structs requires explicit metadata updates and Hudi's schema evolution +-- cannot automatically handle field name changes, we skip this test case +-- The table is created but not used for testing +CREATE TABLE IF NOT EXISTS renaming_complex_columns_table ( + id STRING, + name STRING, + info STRUCT +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/renaming_complex_columns_table'; + +-- Insert initial data with schema: id, name, info STRUCT +INSERT INTO renaming_complex_columns_table VALUES + ('1', 'Alice', named_struct('age', 25, 'location', 'Guangzhou')), + ('2', 'Bob', named_struct('age', 30, 'location', 'Shanghai')), + ('3', 'Cathy', named_struct('age', 28, 'location', 'Beijing')); + +-- Note: Struct field renaming is not supported by Hudi's schema evolution +-- Hudi cannot automatically handle field name changes in structs without explicit metadata updates +-- ALTER TABLE renaming_complex_columns_table CHANGE COLUMN info info STRUCT; diff --git a/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/10_create_timestamp_table.sql b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/10_create_timestamp_table.sql new file mode 100644 index 00000000000000..f37e8610a6d2dd --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/10_create_timestamp_table.sql @@ -0,0 +1,39 @@ +-- Create timestamp test table +-- This table is used to test timestamp handling with different timezones +USE regression_hudi; + +-- Drop existing table if it exists +DROP TABLE IF EXISTS hudi_table_with_timestamp; + +-- Set time zone for timestamp insertion +-- Timestamps will be inserted in America/Los_Angeles timezone +SET TIME ZONE 'America/Los_Angeles'; + +-- Create hudi_table_with_timestamp table +CREATE TABLE IF NOT EXISTS hudi_table_with_timestamp ( + id STRING, + name STRING, + event_time TIMESTAMP +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + preCombineField = 'event_time', + hoodie.metadata.enable = 'false', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/regression_hudi/hudi_table_with_timestamp'; + +-- Insert data with timestamps +-- Note: Timestamps are inserted in America/Los_Angeles timezone +-- The test will query them in different timezones to verify timezone handling +INSERT INTO hudi_table_with_timestamp VALUES + ('1', 'Alice', TIMESTAMP '2024-10-25 08:00:00'), + ('2', 'Bob', TIMESTAMP '2024-10-25 09:30:00'), + ('3', 'Charlie', TIMESTAMP '2024-10-25 11:00:00'); + +-- Reset time zone to default (UTC) to avoid affecting other scripts +SET TIME ZONE 'UTC'; + diff --git a/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/11_create_mtmv_tables.sql b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/11_create_mtmv_tables.sql new file mode 100644 index 00000000000000..19bc6bf8e375ae --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/create_preinstalled_scripts/hudi/11_create_mtmv_tables.sql @@ -0,0 +1,132 @@ +-- Create MTMV test tables for Hudi materialized view tests +-- These tables are used by test_hudi_mtmv, test_hudi_rewrite_mtmv, and test_hudi_olap_rewrite_mtmv +USE regression_hudi; + +-- Create database for MTMV tests if it doesn't exist +CREATE DATABASE IF NOT EXISTS hudi_mtmv_regression_test; +USE hudi_mtmv_regression_test; + +-- Drop existing tables if they exist +DROP TABLE IF EXISTS hudi_table_1; +DROP TABLE IF EXISTS hudi_table_two_partitions; +DROP TABLE IF EXISTS hudi_table_null_partition; + +-- ============================================================================ +-- hudi_table_1: Table with par partition (values: 'a', 'b') +-- Used for basic MTMV tests, partition refresh, and rewrite tests +-- ============================================================================ +CREATE TABLE IF NOT EXISTS hudi_table_1 ( + id INT, + age INT, + par STRING +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.parquet.small.file.limit = '100', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +PARTITIONED BY (par) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/hudi_mtmv_regression_test/hudi_table_1'; + +-- Insert data for partition 'a' +INSERT INTO hudi_table_1 PARTITION (par='a') VALUES + (1, 25), + (2, 30), + (3, 28), + (4, 35), + (5, 22); + +-- Insert data for partition 'b' +INSERT INTO hudi_table_1 PARTITION (par='b') VALUES + (6, 40), + (7, 27), + (8, 32), + (9, 29), + (10, 38); + +-- ============================================================================ +-- hudi_table_two_partitions: Table with create_date partition (DATE type) +-- Used for date partition tests and partition sync limit tests +-- Partition values: 2020-01-01, 2038-01-01, 2038-01-02 +-- ============================================================================ +CREATE TABLE IF NOT EXISTS hudi_table_two_partitions ( + id INT, + name STRING, + value INT, + create_date DATE +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.parquet.small.file.limit = '100', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +PARTITIONED BY (create_date) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/hudi_mtmv_regression_test/hudi_table_two_partitions'; + +-- Insert data for partition 2020-01-01 +INSERT INTO hudi_table_two_partitions PARTITION (create_date='2020-01-01') VALUES + (1, 'name1', 100), + (2, 'name2', 200), + (3, 'name3', 300); + +-- Insert data for partition 2038-01-01 +INSERT INTO hudi_table_two_partitions PARTITION (create_date='2038-01-01') VALUES + (4, 'name4', 400), + (5, 'name5', 500), + (6, 'name6', 600), + (7, 'name7', 700); + +-- Insert data for partition 2038-01-02 +INSERT INTO hudi_table_two_partitions PARTITION (create_date='2038-01-02') VALUES + (8, 'name8', 800), + (9, 'name9', 900), + (10, 'name10', 1000); + +-- ============================================================================ +-- hudi_table_null_partition: Table with region partition (contains NULL values) +-- Used for null partition handling tests +-- Partition values: NULL, 'bj' +-- ============================================================================ +CREATE TABLE IF NOT EXISTS hudi_table_null_partition ( + id INT, + name STRING, + value INT, + region STRING +) USING hudi +OPTIONS ( + type = 'cow', + primaryKey = 'id', + hoodie.schema.on.read.enable = 'true', + hoodie.metadata.enable = 'false', + hoodie.parquet.small.file.limit = '100', + hoodie.datasource.hive_sync.enable = 'true', + hoodie.datasource.hive_sync.metastore.uris = '${HIVE_METASTORE_URIS}', + hoodie.datasource.hive_sync.mode = 'hms' +) +PARTITIONED BY (region) +LOCATION 's3a://${HUDI_BUCKET}/warehouse/hudi_mtmv_regression_test/hudi_table_null_partition'; + +-- Insert data for partition NULL (using __HIVE_DEFAULT_PARTITION__) +-- Note: Hudi/Hive uses '__HIVE_DEFAULT_PARTITION__' for NULL partition values +-- When queried, this will appear as 'NULL' or '__HIVE_DEFAULT_PARTITION__' +INSERT INTO hudi_table_null_partition PARTITION (region='__HIVE_DEFAULT_PARTITION__') VALUES + (1, 'name1', 100), + (2, 'name2', 200), + (3, 'name3', 300); + +-- Insert data for partition 'bj' +INSERT INTO hudi_table_null_partition PARTITION (region='bj') VALUES + (4, 'name4', 400), + (5, 'name5', 500), + (6, 'name6', 600); + diff --git a/docker/thirdparties/docker-compose/hudi/scripts/init.sh b/docker/thirdparties/docker-compose/hudi/scripts/init.sh new file mode 100755 index 00000000000000..88d028762ef33d --- /dev/null +++ b/docker/thirdparties/docker-compose/hudi/scripts/init.sh @@ -0,0 +1,236 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euo pipefail + +# Remove SUCCESS file from previous run to ensure fresh initialization +SUCCESS_FILE="/opt/hudi-scripts/SUCCESS" +if [[ -f "${SUCCESS_FILE}" ]]; then + echo "Removing previous SUCCESS file to ensure fresh initialization..." + rm -f "${SUCCESS_FILE}" +fi + +SPARK_HOME=/opt/spark +CONF_DIR="${SPARK_HOME}/conf" +JARS_DIR="${SPARK_HOME}/jars" +CACHE_DIR=/opt/hudi-cache + +mkdir -p "${CONF_DIR}" "${CACHE_DIR}" + +# Function to download a JAR file if it doesn't exist +download_jar() { + local jar_name="$1" + local version="$2" + local url="$3" + local jar_file="${CACHE_DIR}/${jar_name}-${version}.jar" + + if [[ ! -f "${jar_file}" ]]; then + echo "Downloading ${jar_name} JAR ${version} from ${url} ..." >&2 + local download_success=false + if command -v curl >/dev/null 2>&1; then + if curl -sSfL "${url}" -o "${jar_file}"; then + download_success=true + else + echo "Error: Failed to download ${jar_name} from ${url}" >&2 + fi + elif command -v wget >/dev/null 2>&1; then + if wget -qO "${jar_file}" "${url}"; then + download_success=true + else + echo "Error: Failed to download ${jar_name} from ${url}" >&2 + fi + else + echo "Error: Neither curl nor wget is available in hudi-spark container." >&2 + exit 1 + fi + + if [[ "${download_success}" == "false" ]]; then + echo "Error: Failed to download ${jar_name} JAR. Please check the URL: ${url}" >&2 + exit 1 + fi + + if [[ ! -f "${jar_file}" ]]; then + echo "Error: Downloaded file ${jar_file} does not exist" >&2 + exit 1 + fi + fi + echo "${jar_file}" +} + +# Function to link a JAR file to Spark jars directory +link_jar() { + local jar_file="$1" + local jar_name="$2" + local version="$3" + ln -sf "${jar_file}" "${JARS_DIR}/${jar_name}-${version}.jar" +} + +# Wait for Hive Metastore to be ready +echo "Waiting for Hive Metastore to be ready..." +METASTORE_HOST=$(echo "${HIVE_METASTORE_URIS}" | sed 's|thrift://||' | cut -d: -f1) +METASTORE_PORT=$(echo "${HIVE_METASTORE_URIS}" | sed 's|thrift://||' | cut -d: -f2) +MAX_RETRIES=120 +RETRY_COUNT=0 + +while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do + if command -v nc >/dev/null 2>&1; then + if nc -z "${METASTORE_HOST}" "${METASTORE_PORT}" 2>/dev/null; then + echo "Hive Metastore is ready at ${METASTORE_HOST}:${METASTORE_PORT}" + break + fi + elif command -v timeout >/dev/null 2>&1; then + if timeout 1 bash -c "cat < /dev/null > /dev/tcp/${METASTORE_HOST}/${METASTORE_PORT}" 2>/dev/null; then + echo "Hive Metastore is ready at ${METASTORE_HOST}:${METASTORE_PORT}" + break + fi + else + # Fallback: just wait a bit and assume it's ready + if [ $RETRY_COUNT -eq 0 ]; then + echo "Warning: nc or timeout command not available, skipping metastore readiness check" + sleep 10 + break + fi + fi + + RETRY_COUNT=$((RETRY_COUNT + 1)) + if [ $((RETRY_COUNT % 10)) -eq 0 ]; then + echo "Waiting for Hive Metastore... (${RETRY_COUNT}/${MAX_RETRIES})" + fi + sleep 2 +done + +if [ $RETRY_COUNT -ge $MAX_RETRIES ]; then + echo "Error: Hive Metastore did not become ready within $((MAX_RETRIES * 2)) seconds" + exit 1 +fi + +# Write core-site for MinIO (S3A) +cat >"${CONF_DIR}/core-site.xml" < + + fs.s3a.endpoint + ${S3_ENDPOINT} + + + fs.s3a.access.key + ${MINIO_ROOT_USER} + + + fs.s3a.secret.key + ${MINIO_ROOT_PASSWORD} + + + fs.s3a.path.style.access + true + + + fs.s3a.impl + org.apache.hadoop.fs.s3a.S3AFileSystem + + +EOF + +# hive-site to point Spark to the external metastore +cat >"${CONF_DIR}/hive-site.xml" < + + hive.metastore.uris + ${HIVE_METASTORE_URIS} + + + javax.jdo.option.ConnectionDriverName + org.postgresql.Driver + + + datanucleus.schema.autoCreateAll + true + + + hive.metastore.warehouse.dir + s3a://${HUDI_BUCKET}/warehouse + + +EOF + +# Download Hudi bundle +HUDI_BUNDLE_JAR_FILE=$(download_jar "hudi-spark3.5-bundle_2.12" "${HUDI_BUNDLE_VERSION}" "${HUDI_BUNDLE_URL}") +link_jar "${HUDI_BUNDLE_JAR_FILE}" "hudi-spark3.5-bundle_2.12" "${HUDI_BUNDLE_VERSION}" + +# Download Hadoop AWS S3A filesystem JAR (required for S3A support) +# Note: hadoop-common is already included in Spark's built-in Hadoop, no need to download separately +HADOOP_AWS_JAR=$(download_jar "hadoop-aws" "${HADOOP_AWS_VERSION}" "${HADOOP_AWS_URL}") +link_jar "${HADOOP_AWS_JAR}" "hadoop-aws" "${HADOOP_AWS_VERSION}" + +# Download AWS Java SDK Bundle v1 (required for Hadoop 3.3.6 S3A support) +# Note: Hadoop 3.3.x uses AWS SDK v1, version 1.12.262 is recommended +AWS_SDK_BUNDLE_JAR=$(download_jar "aws-java-sdk-bundle" "${AWS_SDK_BUNDLE_VERSION}" "${AWS_SDK_BUNDLE_URL}") +link_jar "${AWS_SDK_BUNDLE_JAR}" "aws-java-sdk-bundle" "${AWS_SDK_BUNDLE_VERSION}" + +# Download PostgreSQL JDBC driver (required for Hive Metastore connection) +POSTGRESQL_JDBC_JAR=$(download_jar "postgresql" "${POSTGRESQL_JDBC_VERSION}" "${POSTGRESQL_JDBC_URL}") +link_jar "${POSTGRESQL_JDBC_JAR}" "postgresql" "${POSTGRESQL_JDBC_VERSION}" + +# Process SQL files with environment variable substitution and execute them +# Similar to iceberg's approach: group SQL files together to reduce client creation overhead +SCRIPTS_DIR="/opt/hudi-scripts/create_preinstalled_scripts/hudi" +TEMP_SQL_DIR="/tmp/hudi_sql" + +if [[ -d "${SCRIPTS_DIR}" ]]; then + mkdir -p "${TEMP_SQL_DIR}" + + # Process each SQL file: substitute environment variables and combine them + echo "Processing Hudi SQL scripts..." + for sql_file in $(find "${SCRIPTS_DIR}" -name '*.sql' | sort); do + echo "Processing ${sql_file}..." + # Use sed to replace environment variables in SQL files + # Replace ${HIVE_METASTORE_URIS} and ${HUDI_BUCKET} with actual values + sed "s|\${HIVE_METASTORE_URIS}|${HIVE_METASTORE_URIS}|g; s|\${HUDI_BUCKET}|${HUDI_BUCKET}|g" "${sql_file}" >> "${TEMP_SQL_DIR}/hudi_total.sql" + echo "" >> "${TEMP_SQL_DIR}/hudi_total.sql" + done + + # Run Spark SQL to execute all SQL scripts + echo "Executing Hudi SQL scripts..." + START_TIME=$(date +%s) + ${SPARK_HOME}/bin/spark-sql \ + --master local[*] \ + --name hudi-init \ + --conf spark.serializer=org.apache.spark.serializer.KryoSerializer \ + --conf spark.sql.catalogImplementation=hive \ + --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension \ + --conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog \ + -f "${TEMP_SQL_DIR}/hudi_total.sql" + END_TIME=$(date +%s) + EXECUTION_TIME=$((END_TIME - START_TIME)) + echo "Hudi SQL scripts executed in ${EXECUTION_TIME} seconds" + + # Clean up temporary SQL file + rm -f "${TEMP_SQL_DIR}/hudi_total.sql" +else + echo "Warning: SQL scripts directory ${SCRIPTS_DIR} not found, skipping table initialization." +fi + +# Create success marker file to indicate initialization is complete +# This file is used by docker healthcheck to verify container is ready +touch ${SUCCESS_FILE} + +echo "Hudi demo data initialized." +echo "Initialization completed successfully." + +# Keep container running for healthcheck and potential future use +# Similar to iceberg's approach: tail -f /dev/null +tail -f /dev/null diff --git a/docker/thirdparties/docker-compose/hudi/scripts/run_sync_tool.sh b/docker/thirdparties/docker-compose/hudi/scripts/run_sync_tool.sh deleted file mode 100755 index 390d09f9670f2d..00000000000000 --- a/docker/thirdparties/docker-compose/hudi/scripts/run_sync_tool.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -function error_exit { - echo "$1" >&2 ## Send message to stderr. Exclude >&2 if you don't want it that way. - exit "${2:-1}" ## Return a code specified by $2 or 1 by default. -} - -if [ -z "${HADOOP_HOME}" ]; then - error_exit "Please make sure the environment variable HADOOP_HOME is setup" -fi - -if [ -z "${HIVE_HOME}" ]; then - error_exit "Please make sure the environment variable HIVE_HOME is setup" -fi - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -#Ensure we pick the right jar even for hive11 builds -HUDI_HIVE_UBER_JAR=`ls -c $DIR/./hudi_docker_compose_attached_file/jar/hoodie-hive-sync-bundle.jar | grep -v source | head -1` - -if [ -z "$HADOOP_CONF_DIR" ]; then - echo "setting hadoop conf dir" - HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop" -fi - -## Include only specific packages from HIVE_HOME/lib to avoid version mismatches -HIVE_EXEC=`ls ${HIVE_HOME}/lib/hive-exec-*.jar | tr '\n' ':'` -HIVE_SERVICE=`ls ${HIVE_HOME}/lib/hive-service-*.jar | grep -v rpc | tr '\n' ':'` -HIVE_METASTORE=`ls ${HIVE_HOME}/lib/hive-metastore-*.jar | tr '\n' ':'` -HIVE_JDBC=`ls ${HIVE_HOME}/lib/hive-jdbc-*.jar | tr '\n' ':'` -if [ -z "${HIVE_JDBC}" ]; then - HIVE_JDBC=`ls ${HIVE_HOME}/lib/hive-jdbc-*.jar | grep -v handler | tr '\n' ':'` -fi -HIVE_JACKSON=`ls ${HIVE_HOME}/lib/jackson-*.jar | tr '\n' ':'` -HIVE_JARS=$HIVE_METASTORE:$HIVE_SERVICE:$HIVE_EXEC:$HIVE_JDBC:$HIVE_JACKSON - -HADOOP_HIVE_JARS=${HIVE_JARS}:${HADOOP_HOME}/share/hadoop/common/*:${HADOOP_HOME}/share/hadoop/mapreduce/*:${HADOOP_HOME}/share/hadoop/hdfs/*:${HADOOP_HOME}/share/hadoop/common/lib/*:${HADOOP_HOME}/share/hadoop/hdfs/lib/* - -echo "Running Command : java -cp ${HADOOP_HIVE_JARS}:${HADOOP_CONF_DIR}:$HUDI_HIVE_UBER_JAR org.apache.hudi.hive.HiveSyncTool $@" -java -cp $HUDI_HIVE_UBER_JAR:${HADOOP_HIVE_JARS}:${HADOOP_CONF_DIR} org.apache.hudi.hive.HiveSyncTool "$@" diff --git a/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_1.sh b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_1.sh deleted file mode 100755 index a5edb7676a3545..00000000000000 --- a/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_1.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -echo "Copying spark default config and setting up configs" -cp /var/scripts/config/spark-defaults.conf $SPARK_CONF_DIR/. -cp /var/scripts/config/log4j2.properties $SPARK_CONF_DIR/. -echo "sleep 10, wait hdfs start" -sleep 10 -echo "hadoop fs -mkdir -p /var/demo/" -hadoop fs -mkdir -p /var/demo/ -echo "hadoop fs -mkdir -p /tmp/spark-events" -hadoop fs -mkdir -p /tmp/spark-events -echo "hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/." -hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/. -echo "chmod +x /var/scripts/run_sync_tool.sh" -chmod +x /var/scripts/run_sync_tool.sh diff --git a/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_2.sh b/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_2.sh deleted file mode 100755 index a55dddd86dfa7b..00000000000000 --- a/docker/thirdparties/docker-compose/hudi/scripts/setup_demo_container_adhoc_2.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -echo "Copying spark default config and setting up configs" -cp /var/scripts/config/spark-defaults.conf $SPARK_CONF_DIR/. -cp /var/scripts/config/log4j2.properties $SPARK_CONF_DIR/. -echo "sleep 10, wait hdfs start" -sleep 10 -echo "hadoop fs -mkdir -p /var/demo/" -hadoop fs -mkdir -p /var/demo/ -echo "hadoop fs -mkdir -p /tmp/spark-events" -hadoop fs -mkdir -p /tmp/spark-events -echo "hadoop fs -mkdir -p /user/hive/" -hadoop fs -mkdir -p /user/hive/ -echo "hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/." -hadoop fs -copyFromLocal -f /var/scripts/config /var/demo/. -echo "hadoop fs -copyFromLocal -f /var/scripts/hudi_docker_compose_attached_file/warehouse /user/hive/" -hadoop fs -copyFromLocal -f /var/scripts/hudi_docker_compose_attached_file/warehouse /user/hive/ -echo "chmod +x /var/scripts/run_sync_tool.sh" -chmod +x /var/scripts/run_sync_tool.sh - -echo "Start synchronizing the stock_ticks_cow table" -/var/scripts/run_sync_tool.sh \ - --jdbc-url jdbc:hive2://hiveserver:10000 \ - --user hive \ - --pass hive \ - --partitioned-by date \ - --base-path /user/hive/warehouse/stock_ticks_cow \ - --database default \ - --table stock_ticks_cow \ - --partition-value-extractor org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor - -echo "Start synchronizing the stock_ticks_mor table" -/var/scripts/run_sync_tool.sh \ - --jdbc-url jdbc:hive2://hiveserver:10000 \ - --user hive \ - --pass hive \ - --partitioned-by date \ - --base-path /user/hive/warehouse/stock_ticks_mor \ - --database default \ - --table stock_ticks_mor \ - --partition-value-extractor org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor - -echo "Start synchronizing the hudi_cow_pt_tbl table" -/var/scripts/run_sync_tool.sh \ - --jdbc-url jdbc:hive2://hiveserver:10000 \ - --user hive \ - --pass hive \ - --partitioned-by dt \ - --base-path /user/hive/warehouse/hudi_cow_pt_tbl \ - --database default \ - --table hudi_cow_pt_tbl \ - --partition-value-extractor org.apache.hudi.hive.HiveStylePartitionValueExtractor - -echo "Start synchronizing the hudi_non_part_cow table" -/var/scripts/run_sync_tool.sh \ - --jdbc-url jdbc:hive2://hiveserver:10000 \ - --user hive \ - --pass hive \ - --base-path /user/hive/warehouse/hudi_non_part_cow \ - --database default \ - --table hudi_non_part_cow \ diff --git a/docker/thirdparties/run-thirdparties-docker.sh b/docker/thirdparties/run-thirdparties-docker.sh index b6000d57e16265..313ca60ad3d07b 100755 --- a/docker/thirdparties/run-thirdparties-docker.sh +++ b/docker/thirdparties/run-thirdparties-docker.sh @@ -44,8 +44,8 @@ Usage: $0 " exit 1 } -DEFAULT_COMPONENTS="mysql,es,hive2,hive3,pg,oracle,sqlserver,clickhouse,mariadb,iceberg,db2,oceanbase,kerberos,minio" -ALL_COMPONENTS="${DEFAULT_COMPONENTS},hudi,trino,kafka,spark,lakesoul,ranger,polaris" +DEFAULT_COMPONENTS="mysql,es,hive2,hive3,pg,oracle,sqlserver,clickhouse,mariadb,iceberg,hudi,db2,oceanbase,kerberos,minio" +ALL_COMPONENTS="${DEFAULT_COMPONENTS},trino,kafka,spark,lakesoul,ranger,polaris" COMPONENTS=$2 HELP=0 STOP=0 @@ -197,6 +197,7 @@ for element in "${COMPONENTS_ARR[@]}"; do RUN_ICEBERG_REST=1 elif [[ "${element}"x == "hudi"x ]]; then RUN_HUDI=1 + RESERVED_PORTS="${RESERVED_PORTS},19083,19100,19101,18080" elif [[ "${element}"x == "trino"x ]]; then RUN_TRINO=1 elif [[ "${element}"x == "spark"x ]]; then @@ -462,24 +463,17 @@ start_iceberg() { } start_hudi() { - # hudi - cp "${ROOT}"/docker-compose/hudi/hudi.yaml.tpl "${ROOT}"/docker-compose/hudi/hudi.yaml - sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hudi/hudi.yaml - sudo docker compose -f "${ROOT}"/docker-compose/hudi/hudi.yaml --env-file "${ROOT}"/docker-compose/hudi/hadoop.env down + HUDI_DIR=${ROOT}/docker-compose/hudi + export CONTAINER_UID=${CONTAINER_UID} + envsubst <"${HUDI_DIR}"/hudi.env.tpl >"${HUDI_DIR}"/hudi.env + set -a + . "${HUDI_DIR}"/hudi.env + set +a + envsubst <"${HUDI_DIR}"/hudi.yaml.tpl >"${HUDI_DIR}"/hudi.yaml + sudo chmod +x "${HUDI_DIR}"/scripts/init.sh + sudo docker compose -f "${HUDI_DIR}"/hudi.yaml --env-file "${HUDI_DIR}"/hudi.env down --remove-orphans if [[ "${STOP}" -ne 1 ]]; then - sudo rm -rf "${ROOT}"/docker-compose/hudi/historyserver - sudo mkdir "${ROOT}"/docker-compose/hudi/historyserver - sudo rm -rf "${ROOT}"/docker-compose/hudi/hive-metastore-postgresql - sudo mkdir "${ROOT}"/docker-compose/hudi/hive-metastore-postgresql - if [[ ! -d "${ROOT}/docker-compose/hudi/scripts/hudi_docker_compose_attached_file" ]]; then - echo "Attached files does not exist, please download the https://doris-regression-hk.oss-cn-hongkong.aliyuncs.com/regression/load/hudi/hudi_docker_compose_attached_file.zip file to the docker-compose/hudi/scripts/ directory and unzip it." - exit 1 - fi - sudo docker compose -f "${ROOT}"/docker-compose/hudi/hudi.yaml --env-file "${ROOT}"/docker-compose/hudi/hadoop.env up -d - echo "sleep 15, wait server start" - sleep 15 - docker exec -it adhoc-1 /bin/bash /var/scripts/setup_demo_container_adhoc_1.sh - docker exec -it adhoc-2 /bin/bash /var/scripts/setup_demo_container_adhoc_2.sh + sudo docker compose -f "${HUDI_DIR}"/hudi.yaml --env-file "${HUDI_DIR}"/hudi.env up -d --wait fi } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java index 6663cfd003f814..362407223355e6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java @@ -73,6 +73,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; @@ -234,8 +235,14 @@ protected Map getLocationProperties() { if (incrementalRead) { return incrementalRelation.getHoodieParams(); } else { - // HudiJniScanner uses hadoop client to read data. - return hmsTable.getBackendStorageProperties(); + // Merge both BE format (AWS_*) and Hadoop format (fs.s3a.*) properties + // Native reader needs BE format, JNI reader needs Hadoop format + Map properties = new HashMap<>(); + // Add BE format properties for native reader + properties.putAll(hmsTable.getBackendStorageProperties()); + // Add Hadoop format properties for JNI reader + properties.putAll(hmsTable.getCatalog().getCatalogProperty().getHadoopProperties()); + return properties; } } diff --git a/regression-test/conf/regression-conf.groovy b/regression-test/conf/regression-conf.groovy index 910fcef82b002d..79b4e5c20bc93d 100644 --- a/regression-test/conf/regression-conf.groovy +++ b/regression-test/conf/regression-conf.groovy @@ -320,4 +320,14 @@ enableTestTvfAnonymous="true" anymousS3Uri="https://datasets-documentation.s3.eu-west-3.amazonaws.com/aapl_stock.csv" anymousS3Region="eu-west-3" anymousS3ExpectDataCount="8365" -awsInstanceProfileRegion="us-east-1" \ No newline at end of file +awsInstanceProfileRegion="us-east-1" + +// hudi p0 external regression test config +// To enable hudi test, you need first start hudi container. +// See `docker/thirdparties/run-thirdparties-docker.sh -c hudi` +enableHudiTest=true +// hudi catalog config +hudiHmsPort=19083 +hudiMinioPort=19100 +hudiMinioAccessKey="minio" +hudiMinioSecretKey="minio123" diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_catalog.out b/regression-test/data/external_table_p0/hudi/test_hudi_catalog.out new file mode 100644 index 00000000000000..2c785e2edbb63b --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_catalog.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test_select_table -- +1 Alice 1234567890 +2 Bob 1234567890 +3 Charlie 9876543210 +4 David 9876543210 + +-- !test_select_table -- +1 Alice 1234567890 +2 Bob 1234567890 +3 Charlie 9876543210 +4 David 9876543210 + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_full_schema_change.out b/regression-test/data/external_table_p0/hudi/test_hudi_full_schema_change.out new file mode 100644 index 00000000000000..5667db8dee4043 --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_full_schema_change.out @@ -0,0 +1,1065 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !all -- +0 \N \N \N \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 \N \N \N \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +10 \N \N \N \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +11 \N \N \N \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +12 \N \N \N \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +13 \N \N \N \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 \N \N \N \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 \N \N \N \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +16 \N \N \N \N +17 \N \N \N \N +18 \N \N \N \N +19 \N \N \N \N +2 \N \N \N \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +20 \N \N \N \N +21 \N \N \N \N +3 \N \N \N \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +4 \N \N \N \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +5 \N \N \N \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +6 \N \N \N \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +7 \N \N \N \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +8 \N \N \N \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +9 \N \N \N \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N + +-- !country_usa -- +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N + +-- !country_usa_cols -- +10 Austin Jack 2 +10 Austin Jack 2 +11 Seattle Karen 2 +11 Seattle Karen 2 +12 Portland Leo 2 +12 Portland Leo 2 +13 Denver Mona 2 +13 Denver Mona 2 +14 Miami Nina 2 +14 Miami Nina 2 +15 New York Emma Smith 2 +15 New York Emma Smith 2 +5 Phoenix Eve 2 +5 Phoenix Eve 2 +6 Philadelphia Frank 2 +6 Philadelphia Frank 2 +7 San Antonio Grace 2 +7 San Antonio Grace 2 +8 San Diego Hank 2 +8 San Diego Hank 2 +9 Dallas Ivy 2 +9 Dallas Ivy 2 + +-- !city_new -- +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N + +-- !city_new_cols -- +1 \N 25 Apple +1 \N 25 Apple +15 USA 30 Banana +15 USA 30 Banana + +-- !age_over_30 -- +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N + +-- !age_over_30_cols -- +11 Seattle \N +11 Seattle \N +13 Denver Vegetable +13 Denver Vegetable +4 Houston \N +4 Houston \N +5 Phoenix \N +5 Phoenix \N +8 San Diego \N +8 San Diego \N + +-- !age_under_25 -- +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N + +-- !age_under_25_cols -- +0 \N person0 +0 \N person0 +12 USA person12 +12 USA person12 +6 USA person6 +6 USA person6 + +-- !name_alice -- +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N + +-- !name_alice_cols -- +1 New York 2 +1 New York 2 + +-- !name_j -- +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N + +-- !name_j_cols -- +10 USA \N +10 USA \N + +-- !map_person5 -- +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N + +-- !map_person5_cols -- +5 Phoenix 40 +5 Phoenix 40 + +-- !array_size_2 -- +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N + +-- !array_size_2_cols -- +0 \N \N +0 \N \N +1 \N \N +1 \N \N +10 USA \N +10 USA \N +11 USA \N +11 USA \N +12 USA \N +12 USA \N +13 USA \N +13 USA \N +14 USA \N +14 USA \N +15 USA \N +15 USA \N +2 \N \N +2 \N \N +3 \N \N +3 \N \N +4 \N \N +4 \N \N +5 USA \N +5 USA \N +6 USA \N +6 USA \N +7 USA \N +7 USA \N +8 USA \N +8 USA \N +9 USA \N +9 USA \N + +-- !quantity_not_null -- +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N + +-- !quantity_not_null_cols -- +10 Austin Jack +10 Austin Jack +11 Seattle Karen +11 Seattle Karen +12 Portland Leo +12 Portland Leo +13 Denver Mona +13 Denver Mona +14 Miami Nina +14 Miami Nina +15 New York Emma Smith +15 New York Emma Smith +8 San Diego Hank +8 San Diego Hank +9 Dallas Ivy +9 Dallas Ivy + +-- !quantity_null -- +0 \N \N \N \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 \N \N \N \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +10 \N \N \N \N +11 \N \N \N \N +12 \N \N \N \N +13 \N \N \N \N +14 \N \N \N \N +15 \N \N \N \N +16 \N \N \N \N +17 \N \N \N \N +18 \N \N \N \N +19 \N \N \N \N +2 \N \N \N \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +20 \N \N \N \N +21 \N \N \N \N +3 \N \N \N \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +4 \N \N \N \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +5 \N \N \N \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +6 \N \N \N \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +7 \N \N \N \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +8 \N \N \N \N +9 \N \N \N \N + +-- !quantity_null_cols -- +0 \N \N +0 \N 2 +0 \N 2 +1 \N \N +1 \N 2 +1 \N 2 +10 \N \N +11 \N \N +12 \N \N +13 \N \N +14 \N \N +15 \N \N +16 \N \N +17 \N \N +18 \N \N +19 \N \N +2 \N \N +2 \N 2 +2 \N 2 +20 \N \N +21 \N \N +3 \N \N +3 \N 2 +3 \N 2 +4 \N \N +4 \N 2 +4 \N 2 +5 \N \N +5 USA 2 +5 USA 2 +6 \N \N +6 USA 2 +6 USA 2 +7 \N \N +7 USA 2 +7 USA 2 +8 \N \N +9 \N \N + +-- !struct2_not_null -- + +-- !struct2_not_null_cols -- + +-- !struct2_null -- +0 \N \N \N \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 \N \N \N \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +10 \N \N \N \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +11 \N \N \N \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +12 \N \N \N \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +13 \N \N \N \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 \N \N \N \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 \N \N \N \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +16 \N \N \N \N +17 \N \N \N \N +18 \N \N \N \N +19 \N \N \N \N +2 \N \N \N \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +20 \N \N \N \N +21 \N \N \N \N +3 \N \N \N \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +4 \N \N \N \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +5 \N \N \N \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +6 \N \N \N \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +7 \N \N \N \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +8 \N \N \N \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +9 \N \N \N \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N + +-- !struct2_null_cols -- +0 \N +0 cn +0 cn +1 \N +1 New York +1 New York +10 \N +10 Austin +10 Austin +11 \N +11 Seattle +11 Seattle +12 \N +12 Portland +12 Portland +13 \N +13 Denver +13 Denver +14 \N +14 Miami +14 Miami +15 \N +15 New York +15 New York +16 \N +17 \N +18 \N +19 \N +2 \N +2 Los Angeles +2 Los Angeles +20 \N +21 \N +3 \N +3 Chicago +3 Chicago +4 \N +4 Houston +4 Houston +5 \N +5 Phoenix +5 Phoenix +6 \N +6 Philadelphia +6 Philadelphia +7 \N +7 San Antonio +7 San Antonio +8 \N +8 San Diego +8 San Diego +9 \N +9 Dallas +9 Dallas + +-- !cc_nested -- + +-- !cc_nested_cols -- + +-- !c_over_20 -- + +-- !c_over_20_cols -- + +-- !new_aa_50 -- + +-- !new_aa_50_cols -- + +-- !gender_female -- +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N + +-- !gender_female_cols -- +11 Seattle 2 +11 Seattle 2 +13 Denver 2 +13 Denver 2 +14 Miami 2 +14 Miami 2 +15 New York 2 +15 New York 2 + +-- !category_fruit -- + +-- !category_fruit_cols -- + +-- !category_vegetable -- +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N + +-- !category_vegetable_cols -- +13 Denver 33 +13 Denver 33 +14 Miami 28 +14 Miami 28 +15 New York 30 +15 New York 30 + +-- !all -- +0 \N \N \N \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 \N \N \N \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +10 \N \N \N \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +11 \N \N \N \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +12 \N \N \N \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +13 \N \N \N \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 \N \N \N \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 \N \N \N \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +16 \N \N \N \N +17 \N \N \N \N +18 \N \N \N \N +19 \N \N \N \N +2 \N \N \N \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +20 \N \N \N \N +21 \N \N \N \N +3 \N \N \N \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +4 \N \N \N \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +5 \N \N \N \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +6 \N \N \N \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +7 \N \N \N \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +8 \N \N \N \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +9 \N \N \N \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N + +-- !country_usa -- +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N + +-- !country_usa_cols -- +10 Austin Jack 2 +10 Austin Jack 2 +11 Seattle Karen 2 +11 Seattle Karen 2 +12 Portland Leo 2 +12 Portland Leo 2 +13 Denver Mona 2 +13 Denver Mona 2 +14 Miami Nina 2 +14 Miami Nina 2 +15 New York Emma Smith 2 +15 New York Emma Smith 2 +5 Phoenix Eve 2 +5 Phoenix Eve 2 +6 Philadelphia Frank 2 +6 Philadelphia Frank 2 +7 San Antonio Grace 2 +7 San Antonio Grace 2 +8 San Diego Hank 2 +8 San Diego Hank 2 +9 Dallas Ivy 2 +9 Dallas Ivy 2 + +-- !city_new -- +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N + +-- !city_new_cols -- +1 \N 25 Apple +1 \N 25 Apple +15 USA 30 Banana +15 USA 30 Banana + +-- !age_over_30 -- +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N + +-- !age_over_30_cols -- +11 Seattle \N +11 Seattle \N +13 Denver Vegetable +13 Denver Vegetable +4 Houston \N +4 Houston \N +5 Phoenix \N +5 Phoenix \N +8 San Diego \N +8 San Diego \N + +-- !age_under_25 -- +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N + +-- !age_under_25_cols -- +0 \N person0 +0 \N person0 +12 USA person12 +12 USA person12 +6 USA person6 +6 USA person6 + +-- !name_alice -- +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N + +-- !name_alice_cols -- +1 New York 2 +1 New York 2 + +-- !name_j -- +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N + +-- !name_j_cols -- +10 USA \N +10 USA \N + +-- !map_person5 -- +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N + +-- !map_person5_cols -- +5 Phoenix 40 +5 Phoenix 40 + +-- !array_size_2 -- +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N + +-- !array_size_2_cols -- +0 \N \N +0 \N \N +1 \N \N +1 \N \N +10 USA \N +10 USA \N +11 USA \N +11 USA \N +12 USA \N +12 USA \N +13 USA \N +13 USA \N +14 USA \N +14 USA \N +15 USA \N +15 USA \N +2 \N \N +2 \N \N +3 \N \N +3 \N \N +4 \N \N +4 \N \N +5 USA \N +5 USA \N +6 USA \N +6 USA \N +7 USA \N +7 USA \N +8 USA \N +8 USA \N +9 USA \N +9 USA \N + +-- !quantity_not_null -- +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N + +-- !quantity_not_null_cols -- +10 Austin Jack +10 Austin Jack +11 Seattle Karen +11 Seattle Karen +12 Portland Leo +12 Portland Leo +13 Denver Mona +13 Denver Mona +14 Miami Nina +14 Miami Nina +15 New York Emma Smith +15 New York Emma Smith +8 San Diego Hank +8 San Diego Hank +9 Dallas Ivy +9 Dallas Ivy + +-- !quantity_null -- +0 \N \N \N \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 \N \N \N \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +10 \N \N \N \N +11 \N \N \N \N +12 \N \N \N \N +13 \N \N \N \N +14 \N \N \N \N +15 \N \N \N \N +16 \N \N \N \N +17 \N \N \N \N +18 \N \N \N \N +19 \N \N \N \N +2 \N \N \N \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +20 \N \N \N \N +21 \N \N \N \N +3 \N \N \N \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +4 \N \N \N \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +5 \N \N \N \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +6 \N \N \N \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +7 \N \N \N \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +8 \N \N \N \N +9 \N \N \N \N + +-- !quantity_null_cols -- +0 \N \N +0 \N 2 +0 \N 2 +1 \N \N +1 \N 2 +1 \N 2 +10 \N \N +11 \N \N +12 \N \N +13 \N \N +14 \N \N +15 \N \N +16 \N \N +17 \N \N +18 \N \N +19 \N \N +2 \N \N +2 \N 2 +2 \N 2 +20 \N \N +21 \N \N +3 \N \N +3 \N 2 +3 \N 2 +4 \N \N +4 \N 2 +4 \N 2 +5 \N \N +5 USA 2 +5 USA 2 +6 \N \N +6 USA 2 +6 USA 2 +7 \N \N +7 USA 2 +7 USA 2 +8 \N \N +9 \N \N + +-- !struct2_not_null -- + +-- !struct2_not_null_cols -- + +-- !struct2_null -- +0 \N \N \N \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +0 {"person0":{"full_name":"zero", "age":2, "gender":null}} {"country":null, "city":"cn", "population":1000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 \N \N \N \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +1 {"person1":{"full_name":"Alice", "age":25, "gender":null}} {"country":null, "city":"New York", "population":8000000} [{"item":"Apple", "quantity":null, "category":null}, {"item":"Banana", "quantity":null, "category":null}] \N +10 \N \N \N \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +10 {"person10":{"full_name":"Jack", "age":26, "gender":null}} {"country":"USA", "city":"Austin", "population":950000} [{"item":"Fig", "quantity":6, "category":null}, {"item":"Date", "quantity":7, "category":null}] \N +11 \N \N \N \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +12 \N \N \N \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +12 {"person12":{"full_name":"Leo", "age":24, "gender":"Male"}} {"country":"USA", "city":"Portland", "population":650000} [{"item":"Guava", "quantity":3, "category":null}, {"item":"Lychee", "quantity":4, "category":null}] \N +13 \N \N \N \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 \N \N \N \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 \N \N \N \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +16 \N \N \N \N +17 \N \N \N \N +18 \N \N \N \N +19 \N \N \N \N +2 \N \N \N \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +2 {"person2":{"full_name":"Bob", "age":30, "gender":null}} {"country":null, "city":"Los Angeles", "population":4000000} [{"item":"Orange", "quantity":null, "category":null}, {"item":"Grape", "quantity":null, "category":null}] \N +20 \N \N \N \N +21 \N \N \N \N +3 \N \N \N \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +3 {"person3":{"full_name":"Charlie", "age":28, "gender":null}} {"country":null, "city":"Chicago", "population":2700000} [{"item":"Pear", "quantity":null, "category":null}, {"item":"Mango", "quantity":null, "category":null}] \N +4 \N \N \N \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +4 {"person4":{"full_name":"David", "age":35, "gender":null}} {"country":null, "city":"Houston", "population":2300000} [{"item":"Kiwi", "quantity":null, "category":null}, {"item":"Pineapple", "quantity":null, "category":null}] \N +5 \N \N \N \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +5 {"person5":{"full_name":"Eve", "age":40, "gender":null}} {"country":"USA", "city":"Phoenix", "population":1600000} [{"item":"Lemon", "quantity":null, "category":null}, {"item":"Lime", "quantity":null, "category":null}] \N +6 \N \N \N \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +6 {"person6":{"full_name":"Frank", "age":22, "gender":null}} {"country":"USA", "city":"Philadelphia", "population":1500000} [{"item":"Watermelon", "quantity":null, "category":null}, {"item":"Strawberry", "quantity":null, "category":null}] \N +7 \N \N \N \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +7 {"person7":{"full_name":"Grace", "age":27, "gender":null}} {"country":"USA", "city":"San Antonio", "population":1500000} [{"item":"Blueberry", "quantity":null, "category":null}, {"item":"Raspberry", "quantity":null, "category":null}] \N +8 \N \N \N \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +8 {"person8":{"full_name":"Hank", "age":32, "gender":null}} {"country":"USA", "city":"San Diego", "population":1400000} [{"item":"Cherry", "quantity":5, "category":null}, {"item":"Plum", "quantity":3, "category":null}] \N +9 \N \N \N \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N +9 {"person9":{"full_name":"Ivy", "age":29, "gender":null}} {"country":"USA", "city":"Dallas", "population":1300000} [{"item":"Peach", "quantity":4, "category":null}, {"item":"Apricot", "quantity":2, "category":null}] \N + +-- !struct2_null_cols -- +0 \N +0 cn +0 cn +1 \N +1 New York +1 New York +10 \N +10 Austin +10 Austin +11 \N +11 Seattle +11 Seattle +12 \N +12 Portland +12 Portland +13 \N +13 Denver +13 Denver +14 \N +14 Miami +14 Miami +15 \N +15 New York +15 New York +16 \N +17 \N +18 \N +19 \N +2 \N +2 Los Angeles +2 Los Angeles +20 \N +21 \N +3 \N +3 Chicago +3 Chicago +4 \N +4 Houston +4 Houston +5 \N +5 Phoenix +5 Phoenix +6 \N +6 Philadelphia +6 Philadelphia +7 \N +7 San Antonio +7 San Antonio +8 \N +8 San Diego +8 San Diego +9 \N +9 Dallas +9 Dallas + +-- !cc_nested -- + +-- !cc_nested_cols -- + +-- !c_over_20 -- + +-- !c_over_20_cols -- + +-- !new_aa_50 -- + +-- !new_aa_50_cols -- + +-- !gender_female -- +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +11 {"person11":{"full_name":"Karen", "age":31, "gender":"Female"}} {"country":"USA", "city":"Seattle", "population":750000} [{"item":"Coconut", "quantity":1, "category":null}, {"item":"Papaya", "quantity":2, "category":null}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N + +-- !gender_female_cols -- +11 Seattle 2 +11 Seattle 2 +13 Denver 2 +13 Denver 2 +14 Miami 2 +14 Miami 2 +15 New York 2 +15 New York 2 + +-- !category_fruit -- + +-- !category_fruit_cols -- + +-- !category_vegetable -- +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +13 {"person13":{"full_name":"Mona", "age":33, "gender":"Female"}} {"country":"USA", "city":"Denver", "population":700000} [{"item":"Avocado", "quantity":2, "category":"Fruit"}, {"item":"Tomato", "quantity":5, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +14 {"person14":{"full_name":"Nina", "age":28, "gender":"Female"}} {"country":"USA", "city":"Miami", "population":450000} [{"item":"Cucumber", "quantity":6, "category":"Vegetable"}, {"item":"Carrot", "quantity":7, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N +15 {"person15":{"full_name":"Emma Smith", "age":30, "gender":"Female"}} {"country":"USA", "city":"New York", "population":8000000} [{"item":"Banana", "quantity":3, "category":"Fruit"}, {"item":"Potato", "quantity":8, "category":"Vegetable"}] \N + +-- !category_vegetable_cols -- +13 Denver 33 +13 Denver 33 +14 Miami 28 +14 Miami 28 +15 New York 30 +15 New York 30 + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_incremental.out b/regression-test/data/external_table_p0/hudi/test_hudi_incremental.out new file mode 100644 index 00000000000000..962ce78a7c9f25 --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_incremental.out @@ -0,0 +1,343 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !incremental_1_end -- +8 + +-- !incremental_1_latest -- +8 + +-- !incremental_earliest_1 -- +2 + +-- !incremental_2_end -- +6 + +-- !incremental_2_latest -- +6 + +-- !incremental_earliest_2 -- +4 + +-- !incremental_1_2 -- +2 + +-- !incremental_3_end -- +4 + +-- !incremental_3_latest -- +4 + +-- !incremental_earliest_3 -- +6 + +-- !incremental_2_3 -- +2 + +-- !incremental_4_end -- +2 + +-- !incremental_4_latest -- +2 + +-- !incremental_earliest_4 -- +8 + +-- !incremental_3_4 -- +2 + +-- !incremental_5_end -- +0 + +-- !incremental_5_latest -- +0 + +-- !incremental_earliest_5 -- +10 + +-- !incremental_4_5 -- +2 + +-- !incremental_1_end -- +8 + +-- !incremental_1_latest -- +8 + +-- !incremental_earliest_1 -- +2 + +-- !incremental_2_end -- +6 + +-- !incremental_2_latest -- +6 + +-- !incremental_earliest_2 -- +4 + +-- !incremental_1_2 -- +2 + +-- !incremental_3_end -- +4 + +-- !incremental_3_latest -- +4 + +-- !incremental_earliest_3 -- +6 + +-- !incremental_2_3 -- +2 + +-- !incremental_4_end -- +2 + +-- !incremental_4_latest -- +2 + +-- !incremental_earliest_4 -- +8 + +-- !incremental_3_4 -- +2 + +-- !incremental_5_end -- +0 + +-- !incremental_5_latest -- +0 + +-- !incremental_earliest_5 -- +10 + +-- !incremental_4_5 -- +2 + +-- !incremental_1_end -- +8 + +-- !incremental_1_latest -- +8 + +-- !incremental_earliest_1 -- +2 + +-- !incremental_2_end -- +6 + +-- !incremental_2_latest -- +6 + +-- !incremental_earliest_2 -- +4 + +-- !incremental_1_2 -- +2 + +-- !incremental_3_end -- +4 + +-- !incremental_3_latest -- +4 + +-- !incremental_earliest_3 -- +6 + +-- !incremental_2_3 -- +2 + +-- !incremental_4_end -- +2 + +-- !incremental_4_latest -- +2 + +-- !incremental_earliest_4 -- +8 + +-- !incremental_3_4 -- +2 + +-- !incremental_5_end -- +0 + +-- !incremental_5_latest -- +0 + +-- !incremental_earliest_5 -- +10 + +-- !incremental_4_5 -- +2 + +-- !incremental_1_end -- +8 + +-- !incremental_1_latest -- +8 + +-- !incremental_earliest_1 -- +2 + +-- !incremental_2_end -- +6 + +-- !incremental_2_latest -- +6 + +-- !incremental_earliest_2 -- +4 + +-- !incremental_1_2 -- +2 + +-- !incremental_3_end -- +4 + +-- !incremental_3_latest -- +4 + +-- !incremental_earliest_3 -- +6 + +-- !incremental_2_3 -- +2 + +-- !incremental_4_end -- +2 + +-- !incremental_4_latest -- +2 + +-- !incremental_earliest_4 -- +8 + +-- !incremental_3_4 -- +2 + +-- !incremental_5_end -- +0 + +-- !incremental_5_latest -- +0 + +-- !incremental_earliest_5 -- +10 + +-- !incremental_4_5 -- +2 + +-- !incremental_1_end -- +8 + +-- !incremental_1_latest -- +8 + +-- !incremental_earliest_1 -- +2 + +-- !incremental_2_end -- +6 + +-- !incremental_2_latest -- +6 + +-- !incremental_earliest_2 -- +4 + +-- !incremental_1_2 -- +2 + +-- !incremental_3_end -- +4 + +-- !incremental_3_latest -- +4 + +-- !incremental_earliest_3 -- +6 + +-- !incremental_2_3 -- +2 + +-- !incremental_4_end -- +2 + +-- !incremental_4_latest -- +2 + +-- !incremental_earliest_4 -- +8 + +-- !incremental_3_4 -- +2 + +-- !incremental_5_end -- +0 + +-- !incremental_5_latest -- +0 + +-- !incremental_earliest_5 -- +10 + +-- !incremental_4_5 -- +2 + +-- !incremental_1_end -- +8 + +-- !incremental_1_latest -- +8 + +-- !incremental_earliest_1 -- +2 + +-- !incremental_2_end -- +6 + +-- !incremental_2_latest -- +6 + +-- !incremental_earliest_2 -- +4 + +-- !incremental_1_2 -- +2 + +-- !incremental_3_end -- +4 + +-- !incremental_3_latest -- +4 + +-- !incremental_earliest_3 -- +6 + +-- !incremental_2_3 -- +2 + +-- !incremental_4_end -- +2 + +-- !incremental_4_latest -- +2 + +-- !incremental_earliest_4 -- +8 + +-- !incremental_3_4 -- +2 + +-- !incremental_5_end -- +0 + +-- !incremental_5_latest -- +0 + +-- !incremental_earliest_5 -- +10 + +-- !incremental_4_5 -- +2 + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_meta.out b/regression-test/data/external_table_p0/hudi/test_hudi_meta.out new file mode 100644 index 00000000000000..95a7f56a31e6e4 --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_meta.out @@ -0,0 +1,35 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !hudi_meta1 -- +commit COMPLETED +commit COMPLETED +commit COMPLETED +commit COMPLETED +commit COMPLETED + +-- !hudi_meta2 -- +deltacommit COMPLETED +deltacommit COMPLETED +deltacommit COMPLETED +deltacommit COMPLETED +deltacommit COMPLETED + +-- !hudi_meta3 -- +commit COMPLETED +commit COMPLETED +commit COMPLETED +commit COMPLETED +commit COMPLETED + +-- !hudi_meta4 -- +commit COMPLETED +commit COMPLETED +commit COMPLETED +commit COMPLETED +commit COMPLETED + +-- !hudi_meta5 -- +commit COMPLETED + +-- !hudi_meta6 -- +deltacommit COMPLETED + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_mtmv.out b/regression-test/data/external_table_p0/hudi/test_hudi_mtmv.out new file mode 100644 index 00000000000000..3be3c4dff7fac7 --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_mtmv.out @@ -0,0 +1,101 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !base_table -- +1 25 a +10 38 b +2 30 a +3 28 a +4 35 a +5 22 a +6 40 b +7 27 b +8 32 b +9 29 b + +-- !refresh_one_partition -- +1 25 a +2 30 a +3 28 a +4 35 a +5 22 a + +-- !refresh_auto -- +1 25 a +10 38 b +2 30 a +3 28 a +4 35 a +5 22 a +6 40 b +7 27 b +8 32 b +9 29 b + +-- !is_sync_before_rebuild -- +true + +-- !is_sync_after_rebuild -- +true + +-- !refresh_complete_rebuild -- +1 25 a +10 38 b +2 30 a +3 28 a +4 35 a +5 22 a +6 40 b +7 27 b +8 32 b +9 29 b + +-- !not_partition_before -- +false + +-- !not_partition -- +1 25 a +10 38 b +2 30 a +3 28 a +4 35 a +5 22 a +6 40 b +7 27 b +8 32 b +9 29 b + +-- !not_partition_after -- +true + +-- !join_one_partition -- +1 25 a 1 2 +2 30 a \N \N +3 28 a \N \N +4 35 a \N \N +5 22 a \N \N + +-- !two_partition -- +1 name1 100 2020-01-01 +10 name10 1000 2038-01-02 +2 name2 200 2020-01-01 +3 name3 300 2020-01-01 +4 name4 400 2038-01-01 +5 name5 500 2038-01-01 +6 name6 600 2038-01-01 +7 name7 700 2038-01-01 +8 name8 800 2038-01-02 +9 name9 900 2038-01-02 + +-- !limit_partition -- +10 name10 1000 2038-01-02 +4 name4 400 2038-01-01 +5 name5 500 2038-01-01 +6 name6 600 2038-01-01 +7 name7 700 2038-01-01 +8 name8 800 2038-01-02 +9 name9 900 2038-01-02 + +-- !null_partition -- +4 name4 400 bj +5 name5 500 bj +6 name6 600 bj + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_olap_rewrite_mtmv.out b/regression-test/data/external_table_p0/hudi/test_hudi_olap_rewrite_mtmv.out new file mode 100644 index 00000000000000..06fe1032e33ca0 --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_olap_rewrite_mtmv.out @@ -0,0 +1,44 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !refresh_one_partition -- +1 25 a 1 2 +2 30 a \N \N +3 28 a \N \N +4 35 a \N \N +5 22 a \N \N + +-- !refresh_one_partition_rewrite -- +1 25 a 1 2 +10 38 b \N \N +2 30 a \N \N +3 28 a \N \N +4 35 a \N \N +5 22 a \N \N +6 40 b \N \N +7 27 b \N \N +8 32 b \N \N +9 29 b \N \N + +-- !refresh_auto -- +1 25 a 1 2 +10 38 b \N \N +2 30 a \N \N +3 28 a \N \N +4 35 a \N \N +5 22 a \N \N +6 40 b \N \N +7 27 b \N \N +8 32 b \N \N +9 29 b \N \N + +-- !refresh_all_partition_rewrite -- +1 25 a 1 2 +10 38 b \N \N +2 30 a \N \N +3 28 a \N \N +4 35 a \N \N +5 22 a \N \N +6 40 b \N \N +7 27 b \N \N +8 32 b \N \N +9 29 b \N \N + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_orc_tables.out b/regression-test/data/external_table_p0/hudi/test_hudi_orc_tables.out new file mode 100644 index 00000000000000..3096365d32d347 --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_orc_tables.out @@ -0,0 +1,9 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !cow -- +1 test1 10.5 +2 test2 20.5 + +-- !mor -- +1 test1 10.5 +2 test2 20.5 + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_partition_prune.out b/regression-test/data/external_table_p0/hudi/test_hudi_partition_prune.out new file mode 100644 index 00000000000000..67eaccc9622efc --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_partition_prune.out @@ -0,0 +1,385 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !one_partition_1_1 -- +1 Alice 2024 +2 Bob 2024 +3 Charlie 2024 + +-- !one_partition_2_1 -- +4 David 2025 +5 Eva 2025 + +-- !one_partition_3_all -- +1 Alice 2024 +2 Bob 2024 +3 Charlie 2024 +4 David 2025 +5 Eva 2025 + +-- !one_partition_4_all -- +5 Eva 2025 + +-- !one_partition_5_1 -- +3 Charlie 2024 + +-- !two_partition_1_1 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 + +-- !two_partition_2_1 -- +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !two_partition_3_2 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 + +-- !two_partition_4_all -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 +6 Frank EU 1 +7 Grace EU 1 +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !two_partition_5_1 -- + +-- !two_partition_6_1 -- +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !three_partition_1_1 -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 + +-- !three_partition_2_1 -- +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 + +-- !three_partition_3_3 -- +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_4_2 -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 +6 Frank US 2025 Q1 + +-- !three_partition_5_all -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 +4 David US 2024 Q2 +5 Eva US 2024 Q2 +6 Frank US 2025 Q1 +7 Grace US 2025 Q2 +8 Hannah EU 2024 Q1 +9 Ivy EU 2024 Q1 +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 +12 Mia EU 2025 Q3 +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_6_1 -- +8 Hannah EU 2024 Q1 +9 Ivy EU 2024 Q1 + +-- !three_partition_7_7 -- +6 Frank US 2025 Q1 +7 Grace US 2025 Q2 +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 +12 Mia EU 2025 Q3 +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_8_2 -- +7 Grace US 2025 Q2 + +-- !one_partition_6_0 -- + +-- !two_partition_7_0 -- + +-- !two_partition_8_0 -- + +-- !three_partition_9_0 -- + +-- !three_partition_10_0 -- + +-- !three_partition_11_0 -- + +-- !time_travel_two_partition_1_4 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 +6 Frank EU 1 +7 Grace EU 1 +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !time_travel_two_partition_2_2 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 + +-- !time_travel_two_partition_3_2 -- +4 David US 2 +5 Eva US 2 +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !time_travel_two_partition_4_0 -- + +-- !time_travel_two_partition_5_1 -- +1 Alice US 1 + +-- !time_travel_two_partition_6_1 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 + +-- !one_partition_boolean -- +1 Alice true +2 Bob true + +-- !one_partition_tinyint -- +1 Alice 1 +2 Bob 1 + +-- !one_partition_smallint -- +1 Alice 10 +2 Bob 10 + +-- !one_partition_int -- +1 Alice 100 +2 Bob 100 + +-- !one_partition_bigint -- +1 Alice 1234567890 +2 Bob 1234567890 + +-- !one_partition_string -- +1 Alice RegionA +2 Bob RegionA + +-- !one_partition_date -- +1 Alice 2023-12-01 +2 Bob 2023-12-01 + +-- !one_partition_timestamp -- +1 Alice 2023-12-01T08:00 +2 Bob 2023-12-01T08:00 + +-- !one_partition_1_1 -- +1 Alice 2024 +2 Bob 2024 +3 Charlie 2024 + +-- !one_partition_2_1 -- +4 David 2025 +5 Eva 2025 + +-- !one_partition_3_all -- +1 Alice 2024 +2 Bob 2024 +3 Charlie 2024 +4 David 2025 +5 Eva 2025 + +-- !one_partition_4_all -- +5 Eva 2025 + +-- !one_partition_5_1 -- +3 Charlie 2024 + +-- !two_partition_1_1 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 + +-- !two_partition_2_1 -- +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !two_partition_3_2 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 + +-- !two_partition_4_all -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 +6 Frank EU 1 +7 Grace EU 1 +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !two_partition_5_1 -- + +-- !two_partition_6_1 -- +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !three_partition_1_1 -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 + +-- !three_partition_2_1 -- +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 + +-- !three_partition_3_3 -- +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_4_2 -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 +6 Frank US 2025 Q1 + +-- !three_partition_5_all -- +1 Alice US 2024 Q1 +2 Bob US 2024 Q1 +3 Charlie US 2024 Q1 +4 David US 2024 Q2 +5 Eva US 2024 Q2 +6 Frank US 2025 Q1 +7 Grace US 2025 Q2 +8 Hannah EU 2024 Q1 +9 Ivy EU 2024 Q1 +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 +12 Mia EU 2025 Q3 +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_6_1 -- +8 Hannah EU 2024 Q1 +9 Ivy EU 2024 Q1 + +-- !three_partition_7_7 -- +6 Frank US 2025 Q1 +7 Grace US 2025 Q2 +10 Jack EU 2025 Q2 +11 Leo EU 2025 Q2 +12 Mia EU 2025 Q3 +13 Nina AS 2025 Q1 +14 Oscar AS 2025 Q2 +15 Paul AS 2025 Q3 + +-- !three_partition_8_2 -- +7 Grace US 2025 Q2 + +-- !one_partition_6_0 -- + +-- !two_partition_7_0 -- + +-- !two_partition_8_0 -- + +-- !three_partition_9_0 -- + +-- !three_partition_10_0 -- + +-- !three_partition_11_0 -- + +-- !time_travel_two_partition_1_4 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 +6 Frank EU 1 +7 Grace EU 1 +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !time_travel_two_partition_2_2 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 +4 David US 2 +5 Eva US 2 + +-- !time_travel_two_partition_3_2 -- +4 David US 2 +5 Eva US 2 +8 Hannah EU 2 +9 Ivy EU 2 +10 Jack EU 2 + +-- !time_travel_two_partition_4_0 -- + +-- !time_travel_two_partition_5_1 -- +1 Alice US 1 + +-- !time_travel_two_partition_6_1 -- +1 Alice US 1 +2 Bob US 1 +3 Charlie US 1 + +-- !one_partition_boolean -- +1 Alice true +2 Bob true + +-- !one_partition_tinyint -- +1 Alice 1 +2 Bob 1 + +-- !one_partition_smallint -- +1 Alice 10 +2 Bob 10 + +-- !one_partition_int -- +1 Alice 100 +2 Bob 100 + +-- !one_partition_bigint -- +1 Alice 1234567890 +2 Bob 1234567890 + +-- !one_partition_string -- +1 Alice RegionA +2 Bob RegionA + +-- !one_partition_date -- +1 Alice 2023-12-01 +2 Bob 2023-12-01 + +-- !one_partition_timestamp -- +1 Alice 2023-12-01T08:00 +2 Bob 2023-12-01T08:00 + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_rewrite_mtmv.out b/regression-test/data/external_table_p0/hudi/test_hudi_rewrite_mtmv.out new file mode 100644 index 00000000000000..77597631587735 --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_rewrite_mtmv.out @@ -0,0 +1,16 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !refresh_one_partition -- +a 5 + +-- !refresh_one_partition_rewrite -- +a 5 +b 5 + +-- !refresh_auto -- +a 5 +b 5 + +-- !refresh_all_partition_rewrite -- +a 5 +b 5 + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_runtime_filter_partition_pruning.out b/regression-test/data/external_table_p0/hudi/test_hudi_runtime_filter_partition_pruning.out new file mode 100644 index 00000000000000..abf14662aba9ad --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_runtime_filter_partition_pruning.out @@ -0,0 +1,115 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !runtime_filter_partition_pruning_boolean_1 -- +2 + +-- !runtime_filter_partition_pruning_boolean_2 -- +4 + +-- !runtime_filter_partition_pruning_tinyint_1 -- +2 + +-- !runtime_filter_partition_pruning_tinyint_2 -- +4 + +-- !runtime_filter_partition_pruning_smallint_1 -- +2 + +-- !runtime_filter_partition_pruning_smallint_2 -- +4 + +-- !runtime_filter_partition_pruning_int_1 -- +2 + +-- !runtime_filter_partition_pruning_int_2 -- +4 + +-- !runtime_filter_partition_pruning_int_3 -- +2 + +-- !runtime_filter_partition_pruning_bigint_1 -- +2 + +-- !runtime_filter_partition_pruning_bigint_2 -- +4 + +-- !runtime_filter_partition_pruning_string_1 -- +2 + +-- !runtime_filter_partition_pruning_string_2 -- +4 + +-- !runtime_filter_partition_pruning_date_1 -- +2 + +-- !runtime_filter_partition_pruning_date_2 -- +4 + +-- !runtime_filter_partition_pruning_timestamp_1 -- +2 + +-- !runtime_filter_partition_pruning_timestamp_2 -- +4 + +-- !runtime_filter_partition_pruning_complex_1 -- +7 + +-- !runtime_filter_partition_pruning_complex_2 -- +5 + +-- !runtime_filter_partition_pruning_boolean_1 -- +2 + +-- !runtime_filter_partition_pruning_boolean_2 -- +4 + +-- !runtime_filter_partition_pruning_tinyint_1 -- +2 + +-- !runtime_filter_partition_pruning_tinyint_2 -- +4 + +-- !runtime_filter_partition_pruning_smallint_1 -- +2 + +-- !runtime_filter_partition_pruning_smallint_2 -- +4 + +-- !runtime_filter_partition_pruning_int_1 -- +2 + +-- !runtime_filter_partition_pruning_int_2 -- +4 + +-- !runtime_filter_partition_pruning_int_3 -- +2 + +-- !runtime_filter_partition_pruning_bigint_1 -- +2 + +-- !runtime_filter_partition_pruning_bigint_2 -- +4 + +-- !runtime_filter_partition_pruning_string_1 -- +2 + +-- !runtime_filter_partition_pruning_string_2 -- +4 + +-- !runtime_filter_partition_pruning_date_1 -- +2 + +-- !runtime_filter_partition_pruning_date_2 -- +4 + +-- !runtime_filter_partition_pruning_timestamp_1 -- +2 + +-- !runtime_filter_partition_pruning_timestamp_2 -- +4 + +-- !runtime_filter_partition_pruning_complex_1 -- +7 + +-- !runtime_filter_partition_pruning_complex_2 -- +5 + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_schema_change.out b/regression-test/data/external_table_p0/hudi/test_hudi_schema_change.out new file mode 100644 index 00000000000000..63b339843921f5 --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_schema_change.out @@ -0,0 +1,27 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !hudi_0 -- +1 Alice 25 \N \N +2 Bob 30 \N \N +3 Charlie 28 New York \N +4 David 35 Los Angeles \N +5 Eve 28 Chicago \N +6 Frank 32 San Francisco 85.5 +7 Grace 29 Seattle 90 +8 Heidi 31 Portland 95.5 +9 Ivan 26 Denver 88 +10 Judy 27 Austin 101.1 +11 QQ 24 cn 222.2 + +-- !hudi_0 -- +1 Alice 25 \N \N +2 Bob 30 \N \N +3 Charlie 28 New York \N +4 David 35 Los Angeles \N +5 Eve 28 Chicago \N +6 Frank 32 San Francisco 85.5 +7 Grace 29 Seattle 90 +8 Heidi 31 Portland 95.5 +9 Ivan 26 Denver 88 +10 Judy 27 Austin 101.1 +11 QQ 24 cn 222.2 + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_schema_evolution.out b/regression-test/data/external_table_p0/hudi/test_hudi_schema_evolution.out new file mode 100644 index 00000000000000..2c24db2b80fe23 --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_schema_evolution.out @@ -0,0 +1,161 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !jni_adding_simple_columns_table_all -- +1 Alice \N \N +2 Bob \N \N +3 Cathy \N \N +4 David 25 New York +5 Eva 30 Los Angeles +6 Frank 28 Chicago + +-- !jni_adding_simple_columns_table_old_data -- +1 Alice \N \N +2 Bob \N \N +3 Cathy \N \N + +-- !jni_adding_simple_columns_table_new_data -- +4 David 25 New York +5 Eva 30 Los Angeles +6 Frank 28 Chicago + +-- !jni_deleting_simple_columns_table_all -- +1 Alice +2 Bob +3 Cathy +4 David +5 Eva +6 Frank + +-- !jni_deleting_simple_columns_table_old_data -- +1 Alice +2 Bob +3 Cathy + +-- !jni_deleting_simple_columns_table_new_data -- +4 David +5 Eva +6 Frank + +-- !jni_reordering_columns_table_all -- +1 Alice 25 +2 Bob 30 +3 Cathy 28 +4 David 26 +5 Eva 31 +6 Frank 29 + +-- !jni_adding_complex_columns_table_all -- +1 Alice {"age":25, "address":"Guangzhou", "email":null} +2 Bob {"age":30, "address":"Shanghai", "email":null} +3 Cathy {"age":28, "address":"Beijing", "email":null} +4 David {"age":25, "address":"Shenzhen", "email":"david@example.com"} +5 Eva {"age":30, "address":"Chengdu", "email":"eva@example.com"} +6 Frank {"age":28, "address":"Wuhan", "email":"frank@example.com"} + +-- !jni_adding_complex_columns_table_old_struct -- +1 Alice {"age":25, "address":"Guangzhou", "email":null} +2 Bob {"age":30, "address":"Shanghai", "email":null} +3 Cathy {"age":28, "address":"Beijing", "email":null} + +-- !jni_adding_complex_columns_table_new_struct -- +4 David {"age":25, "address":"Shenzhen", "email":"david@example.com"} +5 Eva {"age":30, "address":"Chengdu", "email":"eva@example.com"} +6 Frank {"age":28, "address":"Wuhan", "email":"frank@example.com"} + +-- !jni_deleting_complex_columns_table_all -- +1 Alice {"age":25, "address":"Guangzhou", "email":"alice@example.com"} +2 Bob {"age":30, "address":"Shanghai", "email":"bob@example.com"} +3 Cathy {"age":28, "address":"Beijing", "email":"cathy@example.com"} +4 David {"age":25, "address":"Shenzhen", "email":null} +5 Eva {"age":30, "address":"Chengdu", "email":null} +6 Frank {"age":28, "address":"Wuhan", "email":null} + +-- !jni_deleting_complex_columns_table_old_struct -- +1 Alice {"age":25, "address":"Guangzhou", "email":"alice@example.com"} +2 Bob {"age":30, "address":"Shanghai", "email":"bob@example.com"} +3 Cathy {"age":28, "address":"Beijing", "email":"cathy@example.com"} + +-- !jni_deleting_complex_columns_table_new_struct -- +4 David {"age":25, "address":"Shenzhen", "email":null} +5 Eva {"age":30, "address":"Chengdu", "email":null} +6 Frank {"age":28, "address":"Wuhan", "email":null} + +-- !native_adding_simple_columns_table_all -- +1 Alice \N \N +2 Bob \N \N +3 Cathy \N \N +4 David 25 New York +5 Eva 30 Los Angeles +6 Frank 28 Chicago + +-- !native_adding_simple_columns_table_old_data -- +1 Alice \N \N +2 Bob \N \N +3 Cathy \N \N + +-- !native_adding_simple_columns_table_new_data -- +4 David 25 New York +5 Eva 30 Los Angeles +6 Frank 28 Chicago + +-- !native_deleting_simple_columns_table_all -- +1 Alice +2 Bob +3 Cathy +4 David +5 Eva +6 Frank + +-- !native_deleting_simple_columns_table_old_data -- +1 Alice +2 Bob +3 Cathy + +-- !native_deleting_simple_columns_table_new_data -- +4 David +5 Eva +6 Frank + +-- !native_reordering_columns_table_all -- +1 Alice 25 +2 Bob 30 +3 Cathy 28 +4 David 26 +5 Eva 31 +6 Frank 29 + +-- !native_adding_complex_columns_table_all -- +1 Alice {"age":25, "address":"Guangzhou", "email":null} +2 Bob {"age":30, "address":"Shanghai", "email":null} +3 Cathy {"age":28, "address":"Beijing", "email":null} +4 David {"age":25, "address":"Shenzhen", "email":"david@example.com"} +5 Eva {"age":30, "address":"Chengdu", "email":"eva@example.com"} +6 Frank {"age":28, "address":"Wuhan", "email":"frank@example.com"} + +-- !native_adding_complex_columns_table_old_struct -- +1 Alice {"age":25, "address":"Guangzhou", "email":null} +2 Bob {"age":30, "address":"Shanghai", "email":null} +3 Cathy {"age":28, "address":"Beijing", "email":null} + +-- !native_adding_complex_columns_table_new_struct -- +4 David {"age":25, "address":"Shenzhen", "email":"david@example.com"} +5 Eva {"age":30, "address":"Chengdu", "email":"eva@example.com"} +6 Frank {"age":28, "address":"Wuhan", "email":"frank@example.com"} + +-- !native_deleting_complex_columns_table_all -- +1 Alice {"age":25, "address":"Guangzhou", "email":"alice@example.com"} +2 Bob {"age":30, "address":"Shanghai", "email":"bob@example.com"} +3 Cathy {"age":28, "address":"Beijing", "email":"cathy@example.com"} +4 David {"age":25, "address":"Shenzhen", "email":null} +5 Eva {"age":30, "address":"Chengdu", "email":null} +6 Frank {"age":28, "address":"Wuhan", "email":null} + +-- !native_deleting_complex_columns_table_old_struct -- +1 Alice {"age":25, "address":"Guangzhou", "email":"alice@example.com"} +2 Bob {"age":30, "address":"Shanghai", "email":"bob@example.com"} +3 Cathy {"age":28, "address":"Beijing", "email":"cathy@example.com"} + +-- !native_deleting_complex_columns_table_new_struct -- +4 David {"age":25, "address":"Shenzhen", "email":null} +5 Eva {"age":30, "address":"Chengdu", "email":null} +6 Frank {"age":28, "address":"Wuhan", "email":null} + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_snapshot.out b/regression-test/data/external_table_p0/hudi/test_hudi_snapshot.out new file mode 100644 index 00000000000000..d7b11e3d8378c7 --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_snapshot.out @@ -0,0 +1,401 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !q01 -- +1 1710000000000 login +10 1710000009000 comment +2 1710000001000 click +3 1710000002000 logout +4 1710000003000 view +5 1710000004000 purchase +6 1710000005000 search +7 1710000006000 add_to_cart +8 1710000007000 remove_from_cart +9 1710000008000 share + +-- !q02 -- +1 1710000000000 login + +-- !q03 -- +1 1710000000000 login +2 1710000001000 click +3 1710000002000 logout +4 1710000003000 view + +-- !q04 -- +1 1710000000000 login + +-- !q05 -- +add_to_cart 1 +click 1 +comment 1 +login 1 +logout 1 +purchase 1 +remove_from_cart 1 +search 1 +share 1 +view 1 + +-- !q06 -- +1 login +2 click +3 logout +4 view +5 purchase + +-- !q01 -- +1 1710000000000 login 2024-03-01 +10 1710000009000 comment 2024-03-01 +2 1710000001000 click 2024-03-01 +3 1710000002000 logout 2024-03-02 +4 1710000003000 view 2024-03-01 +5 1710000004000 purchase 2024-03-02 +6 1710000005000 search 2024-03-01 +7 1710000006000 add_to_cart 2024-03-02 +8 1710000007000 remove_from_cart 2024-03-01 +9 1710000008000 share 2024-03-02 + +-- !q02 -- +1 1710000000000 login 2024-03-01 + +-- !q03 -- +1 1710000000000 login 2024-03-01 +2 1710000001000 click 2024-03-01 +3 1710000002000 logout 2024-03-02 +4 1710000003000 view 2024-03-01 + +-- !q04 -- +1 1710000000000 login 2024-03-01 + +-- !q05 -- +add_to_cart 1 +click 1 +comment 1 +login 1 +logout 1 +purchase 1 +remove_from_cart 1 +search 1 +share 1 +view 1 + +-- !q06 -- +1 login +2 click +3 logout +4 view +5 purchase + +-- !q07 -- +1 1710000000000 login 2024-03-01 +2 1710000001000 click 2024-03-01 +4 1710000003000 view 2024-03-01 +6 1710000005000 search 2024-03-01 +8 1710000007000 remove_from_cart 2024-03-01 + +-- !q08 -- +1 2024-03-01 +2 2024-03-01 +4 2024-03-01 +6 2024-03-01 +8 2024-03-01 + +-- !q01 -- +1 1710000000000 login +10 1710000009000 comment +2 1710000001000 click +3 1710000002000 logout +4 1710000003000 view +5 1710000004000 purchase +6 1710000005000 search +7 1710000006000 add_to_cart +8 1710000007000 remove_from_cart +9 1710000008000 share + +-- !q02 -- +1 1710000000000 login + +-- !q03 -- +1 1710000000000 login +2 1710000001000 click +3 1710000002000 logout +4 1710000003000 view + +-- !q04 -- +1 1710000000000 login + +-- !q05 -- +add_to_cart 1 +click 1 +comment 1 +login 1 +logout 1 +purchase 1 +remove_from_cart 1 +search 1 +share 1 +view 1 + +-- !q06 -- +1 login +2 click +3 logout +4 view +5 purchase + +-- !q01 -- +1 1710000000000 login 2024-03-01 +10 1710000009000 comment 2024-03-01 +2 1710000001000 click 2024-03-01 +3 1710000002000 logout 2024-03-02 +4 1710000003000 view 2024-03-01 +5 1710000004000 purchase 2024-03-02 +6 1710000005000 search 2024-03-01 +7 1710000006000 add_to_cart 2024-03-02 +8 1710000007000 remove_from_cart 2024-03-01 +9 1710000008000 share 2024-03-02 + +-- !q02 -- +1 1710000000000 login 2024-03-01 + +-- !q03 -- +1 1710000000000 login 2024-03-01 +2 1710000001000 click 2024-03-01 +3 1710000002000 logout 2024-03-02 +4 1710000003000 view 2024-03-01 + +-- !q04 -- +1 1710000000000 login 2024-03-01 + +-- !q05 -- +add_to_cart 1 +click 1 +comment 1 +login 1 +logout 1 +purchase 1 +remove_from_cart 1 +search 1 +share 1 +view 1 + +-- !q06 -- +1 login +2 click +3 logout +4 view +5 purchase + +-- !q07 -- +1 1710000000000 login 2024-03-01 +2 1710000001000 click 2024-03-01 +4 1710000003000 view 2024-03-01 +6 1710000005000 search 2024-03-01 +8 1710000007000 remove_from_cart 2024-03-01 + +-- !q08 -- +1 2024-03-01 +2 2024-03-01 +4 2024-03-01 +6 2024-03-01 +8 2024-03-01 + +-- !q01 -- +1 1710000000000 login +10 1710000009000 comment +2 1710000001000 click +3 1710000002000 logout +4 1710000003000 view +5 1710000004000 purchase +6 1710000005000 search +7 1710000006000 add_to_cart +8 1710000007000 remove_from_cart +9 1710000008000 share + +-- !q02 -- +1 1710000000000 login + +-- !q03 -- +1 1710000000000 login +2 1710000001000 click +3 1710000002000 logout +4 1710000003000 view + +-- !q04 -- +1 1710000000000 login + +-- !q05 -- +add_to_cart 1 +click 1 +comment 1 +login 1 +logout 1 +purchase 1 +remove_from_cart 1 +search 1 +share 1 +view 1 + +-- !q06 -- +1 login +2 click +3 logout +4 view +5 purchase + +-- !q01 -- +1 1710000000000 login 2024-03-01 +10 1710000009000 comment 2024-03-01 +2 1710000001000 click 2024-03-01 +3 1710000002000 logout 2024-03-02 +4 1710000003000 view 2024-03-01 +5 1710000004000 purchase 2024-03-02 +6 1710000005000 search 2024-03-01 +7 1710000006000 add_to_cart 2024-03-02 +8 1710000007000 remove_from_cart 2024-03-01 +9 1710000008000 share 2024-03-02 + +-- !q02 -- +1 1710000000000 login 2024-03-01 + +-- !q03 -- +1 1710000000000 login 2024-03-01 +2 1710000001000 click 2024-03-01 +3 1710000002000 logout 2024-03-02 +4 1710000003000 view 2024-03-01 + +-- !q04 -- +1 1710000000000 login 2024-03-01 + +-- !q05 -- +add_to_cart 1 +click 1 +comment 1 +login 1 +logout 1 +purchase 1 +remove_from_cart 1 +search 1 +share 1 +view 1 + +-- !q06 -- +1 login +2 click +3 logout +4 view +5 purchase + +-- !q07 -- +1 1710000000000 login 2024-03-01 +2 1710000001000 click 2024-03-01 +4 1710000003000 view 2024-03-01 +6 1710000005000 search 2024-03-01 +8 1710000007000 remove_from_cart 2024-03-01 + +-- !q08 -- +1 2024-03-01 +2 2024-03-01 +4 2024-03-01 +6 2024-03-01 +8 2024-03-01 + +-- !q01 -- +1 1710000000000 login +10 1710000009000 comment +2 1710000001000 click +3 1710000002000 logout +4 1710000003000 view +5 1710000004000 purchase +6 1710000005000 search +7 1710000006000 add_to_cart +8 1710000007000 remove_from_cart +9 1710000008000 share + +-- !q02 -- +1 1710000000000 login + +-- !q03 -- +1 1710000000000 login +2 1710000001000 click +3 1710000002000 logout +4 1710000003000 view + +-- !q04 -- +1 1710000000000 login + +-- !q05 -- +add_to_cart 1 +click 1 +comment 1 +login 1 +logout 1 +purchase 1 +remove_from_cart 1 +search 1 +share 1 +view 1 + +-- !q06 -- +1 login +2 click +3 logout +4 view +5 purchase + +-- !q01 -- +1 1710000000000 login 2024-03-01 +10 1710000009000 comment 2024-03-01 +2 1710000001000 click 2024-03-01 +3 1710000002000 logout 2024-03-02 +4 1710000003000 view 2024-03-01 +5 1710000004000 purchase 2024-03-02 +6 1710000005000 search 2024-03-01 +7 1710000006000 add_to_cart 2024-03-02 +8 1710000007000 remove_from_cart 2024-03-01 +9 1710000008000 share 2024-03-02 + +-- !q02 -- +1 1710000000000 login 2024-03-01 + +-- !q03 -- +1 1710000000000 login 2024-03-01 +2 1710000001000 click 2024-03-01 +3 1710000002000 logout 2024-03-02 +4 1710000003000 view 2024-03-01 + +-- !q04 -- +1 1710000000000 login 2024-03-01 + +-- !q05 -- +add_to_cart 1 +click 1 +comment 1 +login 1 +logout 1 +purchase 1 +remove_from_cart 1 +search 1 +share 1 +view 1 + +-- !q06 -- +1 login +2 click +3 logout +4 view +5 purchase + +-- !q07 -- +1 1710000000000 login 2024-03-01 +2 1710000001000 click 2024-03-01 +4 1710000003000 view 2024-03-01 +6 1710000005000 search 2024-03-01 +8 1710000007000 remove_from_cart 2024-03-01 + +-- !q08 -- +1 2024-03-01 +2 2024-03-01 +4 2024-03-01 +6 2024-03-01 +8 2024-03-01 + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_timestamp.out b/regression-test/data/external_table_p0/hudi/test_hudi_timestamp.out new file mode 100644 index 00000000000000..2ed5018b9cd0a6 --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_timestamp.out @@ -0,0 +1,31 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !timestamp1 -- +1 Alice 2024-10-25T08:00 +2 Bob 2024-10-25T09:30 +3 Charlie 2024-10-25T11:00 + +-- !timestamp2 -- +1 Alice 2024-10-25T23:00 +2 Bob 2024-10-26T00:30 +3 Charlie 2024-10-26T02:00 + +-- !timestamp3 -- +1 Alice 2024-10-25T15:00 +2 Bob 2024-10-25T16:30 +3 Charlie 2024-10-25T18:00 + +-- !timestamp1 -- +1 Alice 2024-10-25T08:00 +2 Bob 2024-10-25T09:30 +3 Charlie 2024-10-25T11:00 + +-- !timestamp2 -- +1 Alice 2024-10-25T23:00 +2 Bob 2024-10-26T00:30 +3 Charlie 2024-10-26T02:00 + +-- !timestamp3 -- +1 Alice 2024-10-25T15:00 +2 Bob 2024-10-25T16:30 +3 Charlie 2024-10-25T18:00 + diff --git a/regression-test/data/external_table_p0/hudi/test_hudi_timetravel.out b/regression-test/data/external_table_p0/hudi/test_hudi_timetravel.out new file mode 100644 index 00000000000000..2163a5e970024f --- /dev/null +++ b/regression-test/data/external_table_p0/hudi/test_hudi_timetravel.out @@ -0,0 +1,121 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !timetravel1 -- +2 + +-- !timetravel2 -- +4 + +-- !timetravel3 -- +6 + +-- !timetravel4 -- +8 + +-- !timetravel5 -- +10 + +-- !timetravel1 -- +2 + +-- !timetravel2 -- +4 + +-- !timetravel3 -- +6 + +-- !timetravel4 -- +8 + +-- !timetravel5 -- +10 + +-- !timetravel1 -- +2 + +-- !timetravel2 -- +4 + +-- !timetravel3 -- +6 + +-- !timetravel4 -- +8 + +-- !timetravel5 -- +10 + +-- !timetravel1 -- +2 + +-- !timetravel2 -- +4 + +-- !timetravel3 -- +6 + +-- !timetravel4 -- +8 + +-- !timetravel5 -- +10 + +-- !timetravel1 -- +2 + +-- !timetravel2 -- +4 + +-- !timetravel3 -- +6 + +-- !timetravel4 -- +8 + +-- !timetravel5 -- +10 + +-- !timetravel1 -- +2 + +-- !timetravel2 -- +4 + +-- !timetravel3 -- +6 + +-- !timetravel4 -- +8 + +-- !timetravel5 -- +10 + +-- !timetravel1 -- +2 + +-- !timetravel2 -- +4 + +-- !timetravel3 -- +6 + +-- !timetravel4 -- +8 + +-- !timetravel5 -- +10 + +-- !timetravel1 -- +2 + +-- !timetravel2 -- +4 + +-- !timetravel3 -- +6 + +-- !timetravel4 -- +8 + +-- !timetravel5 -- +10 + diff --git a/regression-test/pipeline/external/conf/regression-conf.groovy b/regression-test/pipeline/external/conf/regression-conf.groovy index 75299d7908292f..fc167d838b56a8 100644 --- a/regression-test/pipeline/external/conf/regression-conf.groovy +++ b/regression-test/pipeline/external/conf/regression-conf.groovy @@ -197,3 +197,12 @@ enableLakesoulTest = true // AWS iam role config +// hudi p0 external regression test config +// To enable hudi test, you need first start hudi container. +// See `docker/thirdparties/run-thirdparties-docker.sh -c hudi` +enableHudiTest=true +// hudi catalog config +hudiHmsPort=19083 +hudiMinioPort=19100 +hudiMinioAccessKey="minio" +hudiMinioSecretKey="minio123" diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_catalog.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_catalog.groovy new file mode 100644 index 00000000000000..2c7a51110822a2 --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_catalog.groovy @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_catalog", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + + String catalog_name = "test_hudi_catalog" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + def tables = sql """ show tables; """ + assertTrue(tables.size() > 0) + order_qt_test_select_table """ + select id, name, part1 from bigint_partition_tb order by id; + """ + try { + sql """ set force_jni_scanner = true; """ + order_qt_test_select_table """ + select id, name, part1 from bigint_partition_tb order by id; + """ + } finally { + sql """ set force_jni_scanner = false; """ + } +} + diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_full_schema_change.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_full_schema_change.groovy new file mode 100644 index 00000000000000..8a5bc532bde66c --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_full_schema_change.groovy @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_full_schema_change", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + + String catalog_name = "test_hudi_full_schema_change" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + sql """set force_jni_scanner = false;""" + + + def tables = ["hudi_full_schema_change_parquet","hudi_full_schema_change_orc"] + + + for (String table: tables) { + order_qt_all """ select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} ORDER BY id""" + + order_qt_country_usa """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(struct_column, 'country') = 'USA' ORDER BY id""" + order_qt_country_usa_cols """select id, STRUCT_ELEMENT(struct_column, 'city') AS city, STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'full_name') AS full_name, ARRAY_SIZE(array_column) AS array_size FROM ${table} WHERE STRUCT_ELEMENT(struct_column, 'country') = 'USA' ORDER BY id""" + + order_qt_city_new """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(struct_column, 'city') LIKE 'New%' ORDER BY id""" + order_qt_city_new_cols """select id, STRUCT_ELEMENT(struct_column, 'country') AS country, STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'age') AS age, STRUCT_ELEMENT(array_column[1], 'item') AS first_item FROM ${table} WHERE STRUCT_ELEMENT(struct_column, 'city') LIKE 'New%' ORDER BY id""" + + order_qt_age_over_30 """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'age') > 30 ORDER BY id""" + order_qt_age_over_30_cols """select id, STRUCT_ELEMENT(struct_column, 'city') AS city, STRUCT_ELEMENT(array_column[2], 'category') AS second_category FROM ${table} WHERE STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'age') > 30 ORDER BY id""" + + order_qt_age_under_25 """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'age') < 25 ORDER BY id""" + order_qt_age_under_25_cols """select id, STRUCT_ELEMENT(struct_column, 'country') AS country, MAP_KEYS(new_map_column)[1] AS map_key FROM ${table} WHERE STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'age') < 25 ORDER BY id""" + + order_qt_name_alice """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'full_name') = 'Alice' ORDER BY id""" + order_qt_name_alice_cols """select id, STRUCT_ELEMENT(struct_column, 'city') AS city, ARRAY_SIZE(array_column) AS array_size FROM ${table} WHERE STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'full_name') = 'Alice' ORDER BY id""" + + order_qt_name_j """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'full_name') like 'J%' ORDER BY id""" + order_qt_name_j_cols """select id, STRUCT_ELEMENT(struct_column, 'country') AS country, STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'gender') AS gender FROM ${table} WHERE STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'full_name') LIKE 'J%' ORDER BY id""" + + order_qt_map_person5 """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE ARRAY_CONTAINS(MAP_KEYS(new_map_column), 'person5') ORDER BY id""" + order_qt_map_person5_cols """select id, STRUCT_ELEMENT(struct_column, 'city') AS city, STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'age') AS age FROM ${table} WHERE ARRAY_CONTAINS(MAP_KEYS(new_map_column), 'person5') ORDER BY id""" + + order_qt_array_size_2 """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE ARRAY_SIZE(array_column) = 2 ORDER BY id""" + order_qt_array_size_2_cols """select id, STRUCT_ELEMENT(struct_column, 'country') AS country, STRUCT_ELEMENT(STRUCT_ELEMENT(struct_column2, 'b'), 'cc') AS b_cc FROM ${table} WHERE ARRAY_SIZE(array_column) = 2 ORDER BY id""" + + order_qt_quantity_not_null """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(array_column[1], 'quantity') IS NOT NULL ORDER BY id""" + order_qt_quantity_not_null_cols """select id, STRUCT_ELEMENT(struct_column, 'city') AS city, STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'full_name') AS full_name FROM ${table} WHERE STRUCT_ELEMENT(array_column[1], 'quantity') IS NOT NULL ORDER BY id""" + + order_qt_quantity_null """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(array_column[1], 'quantity') IS NULL ORDER BY id""" + order_qt_quantity_null_cols """select id, STRUCT_ELEMENT(struct_column, 'country') AS country, ARRAY_SIZE(array_column) AS array_size FROM ${table} WHERE STRUCT_ELEMENT(array_column[1], 'quantity') IS NULL ORDER BY id""" + + order_qt_struct2_not_null """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE struct_column2 IS NOT NULL ORDER BY id""" + order_qt_struct2_not_null_cols """select id, STRUCT_ELEMENT(struct_column2, 'c') AS c_value, STRUCT_ELEMENT(STRUCT_ELEMENT(struct_column2, 'new_a'), 'new_aa') AS new_aa FROM ${table} WHERE struct_column2 IS NOT NULL ORDER BY id""" + + order_qt_struct2_null """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE struct_column2 IS NULL ORDER BY id""" + order_qt_struct2_null_cols """select id, STRUCT_ELEMENT(struct_column, 'city') AS city FROM ${table} WHERE struct_column2 IS NULL ORDER BY id""" + + order_qt_cc_nested """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(STRUCT_ELEMENT(struct_column2, 'b'), 'cc') like 'NestedC%' ORDER BY id""" + order_qt_cc_nested_cols """select id, STRUCT_ELEMENT(struct_column2, 'c') AS c_value FROM ${table} WHERE STRUCT_ELEMENT(STRUCT_ELEMENT(struct_column2, 'b'), 'cc') LIKE 'NestedC%' ORDER BY id""" + + order_qt_c_over_20 """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(struct_column2, 'c') > 20 ORDER BY id""" + order_qt_c_over_20_cols """select id, STRUCT_ELEMENT(STRUCT_ELEMENT(struct_column2, 'b'), 'cc') AS b_cc FROM ${table} WHERE STRUCT_ELEMENT(struct_column2, 'c') > 20 ORDER BY id""" + + order_qt_new_aa_50 """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(STRUCT_ELEMENT(struct_column2, 'new_a'), 'new_aa') = 50 ORDER BY id""" + order_qt_new_aa_50_cols """select id, STRUCT_ELEMENT(struct_column2, 'c') AS c_value FROM ${table} WHERE STRUCT_ELEMENT(STRUCT_ELEMENT(struct_column2, 'new_a'), 'new_aa') = 50 ORDER BY id""" + + order_qt_gender_female """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'gender') = 'Female' ORDER BY id""" + order_qt_gender_female_cols """select id, STRUCT_ELEMENT(struct_column, 'city') AS city, ARRAY_SIZE(array_column) AS array_size FROM ${table} WHERE STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'gender') = 'Female' ORDER BY id""" + + order_qt_category_fruit """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(array_column[2], 'category') = 'Fruit' ORDER BY id""" + order_qt_category_fruit_cols """select id, STRUCT_ELEMENT(struct_column, 'country') AS country, STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'full_name') AS full_name FROM ${table} WHERE STRUCT_ELEMENT(array_column[2], 'category') = 'Fruit' ORDER BY id""" + + order_qt_category_vegetable """select id,new_map_column,struct_column, array_column,struct_column2 FROM ${table} WHERE STRUCT_ELEMENT(array_column[2], 'category') = 'Vegetable' ORDER BY id""" + order_qt_category_vegetable_cols """select id, STRUCT_ELEMENT(struct_column, 'city') AS city, STRUCT_ELEMENT(MAP_VALUES(new_map_column)[1], 'age') AS age FROM ${table} WHERE STRUCT_ELEMENT(array_column[2], 'category') = 'Vegetable' ORDER BY id""" + } + + sql """drop catalog if exists ${catalog_name};""" +} + diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_incremental.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_incremental.groovy new file mode 100644 index 00000000000000..07f3fedf7f988e --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_incremental.groovy @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_incremental", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + + String catalog_name = "test_hudi_incremental" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + + // Function to get commit timestamps dynamically from hudi_meta table function + def getCommitTimestamps = { table_name -> + def result = sql """ + SELECT timestamp + FROM hudi_meta("table"="${catalog_name}.regression_hudi.${table_name}", "query_type" = "timeline") + WHERE action = 'commit' OR action = 'deltacommit' + ORDER BY timestamp + """ + return result.collect { it[0] } + } + + def test_hudi_incremental_querys = { table_name, timestamps -> + timestamps.eachWithIndex { timestamp, index -> + def query_name = "qt_incremental_${index + 1}_end" + "${query_name}" """ select count(user_id) from ${table_name}@incr('beginTime' = '${timestamp}'); """ + query_name = "qt_incremental_${index + 1}_latest" + "${query_name}" """ select count(user_id) from ${table_name}@incr('beginTime' = '${timestamp}', 'endTime' = 'latest'); """ + query_name = "qt_incremental_earliest_${index + 1}" + "${query_name}" """ select count(user_id) from ${table_name}@incr('beginTime' = 'earliest', 'endTime' = '${timestamp}'); """ + if (index > 0) { + query_name = "qt_incremental_${index}_${index + 1}" + "${query_name}" """ select count(user_id) from ${table_name}@incr('beginTime' = '${timestamps[index - 1]}', 'endTime' = '${timestamp}'); """ + } + } + } + + // Get commit timestamps dynamically for each table + def timestamps_cow_non_partition = getCommitTimestamps("user_activity_log_cow_non_partition") + def timestamps_cow_partition = getCommitTimestamps("user_activity_log_cow_partition") + def timestamps_mor_non_partition = getCommitTimestamps("user_activity_log_mor_non_partition") + def timestamps_mor_partition = getCommitTimestamps("user_activity_log_mor_partition") + + sql """set force_jni_scanner=true;""" + // TODO: @suxiaogang223 don't support incremental query for cow table by jni reader + // test_hudi_incremental_querys("user_activity_log_cow_non_partition", timestamps_cow_non_partition) + // test_hudi_incremental_querys("user_activity_log_cow_partition", timestamps_cow_partition) + test_hudi_incremental_querys("user_activity_log_mor_non_partition", timestamps_mor_non_partition) + test_hudi_incremental_querys("user_activity_log_mor_partition", timestamps_mor_partition) + + sql """set force_jni_scanner=false;""" + test_hudi_incremental_querys("user_activity_log_cow_non_partition", timestamps_cow_non_partition) + test_hudi_incremental_querys("user_activity_log_cow_partition", timestamps_cow_partition) + test_hudi_incremental_querys("user_activity_log_mor_non_partition", timestamps_mor_non_partition) + test_hudi_incremental_querys("user_activity_log_mor_partition", timestamps_mor_partition) + + sql """drop catalog if exists ${catalog_name};""" +} diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_meta.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_meta.groovy new file mode 100644 index 00000000000000..6d8ded6ebae6cf --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_meta.groovy @@ -0,0 +1,94 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_meta", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + + String catalog_name = "test_hudi_meta" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + + // Query timeline and verify structure (action, state) without relying on specific timestamps + // For user_activity_log_cow_non_partition: expect 5 commits (we changed from 10 to 5 commits) + qt_hudi_meta1 """ + SELECT action, state + FROM hudi_meta("table"="${catalog_name}.regression_hudi.user_activity_log_cow_non_partition", "query_type" = "timeline") + ORDER BY timestamp; + """ + + // For user_activity_log_mor_non_partition: expect 5 deltacommits + qt_hudi_meta2 """ + SELECT action, state + FROM hudi_meta("table"="${catalog_name}.regression_hudi.user_activity_log_mor_non_partition", "query_type" = "timeline") + ORDER BY timestamp; + """ + + // For user_activity_log_cow_partition: expect 5 commits + qt_hudi_meta3 """ + SELECT action, state + FROM hudi_meta("table"="${catalog_name}.regression_hudi.user_activity_log_cow_partition", "query_type" = "timeline") + ORDER BY timestamp; + """ + + // Same table as hudi_meta3, should have same result + qt_hudi_meta4 """ + SELECT action, state + FROM hudi_meta("table"="${catalog_name}.regression_hudi.user_activity_log_cow_partition", "query_type" = "timeline") + ORDER BY timestamp; + """ + + // For timetravel_cow: expect 1 commit + qt_hudi_meta5 """ + SELECT action, state + FROM hudi_meta("table"="${catalog_name}.regression_hudi.timetravel_cow", "query_type" = "timeline") + ORDER BY timestamp; + """ + + // For timetravel_mor: expect 1 deltacommit + qt_hudi_meta6 """ + SELECT action, state + FROM hudi_meta("table"="${catalog_name}.regression_hudi.timetravel_mor", "query_type" = "timeline") + ORDER BY timestamp; + """ + + sql """drop catalog if exists ${catalog_name};""" +} + diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_mtmv.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_mtmv.groovy new file mode 100644 index 00000000000000..d363496a7a1bc1 --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_mtmv.groovy @@ -0,0 +1,277 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_mtmv", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + String suiteName = "test_hudi_mtmv" + String catalogName = "${suiteName}_catalog" + String mvName = "${suiteName}_mv" + String dbName = context.config.getDbNameByFile(context.file) + String otherDbName = "${suiteName}_otherdb" + String tableName = "${suiteName}_table" + + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop database if exists ${otherDbName}""" + sql """create database ${otherDbName}""" + sql """ + CREATE TABLE ${otherDbName}.${tableName} ( + `user_id` INT, + `num` INT + ) ENGINE=OLAP + DUPLICATE KEY(`user_id`) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 2 + PROPERTIES ('replication_num' = '1') ; + """ + + sql """ + insert into ${otherDbName}.${tableName} values(1,2); + """ + + sql """drop catalog if exists ${catalogName}""" + sql """ + create catalog if not exists ${catalogName} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + order_qt_base_table """ select id, age, par from ${catalogName}.hudi_mtmv_regression_test.hudi_table_1; """ + + sql """drop materialized view if exists ${mvName};""" + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`par`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + SELECT id, age, par FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_1; + """ + def showPartitionsResult = sql """show partitions from ${mvName}""" + logger.info("showPartitionsResult: " + showPartitionsResult.toString()) + assertTrue(showPartitionsResult.toString().contains("p_a")) + assertTrue(showPartitionsResult.toString().contains("p_b")) + + // refresh one partitions + sql """ + REFRESH MATERIALIZED VIEW ${mvName} partitions(p_a); + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_refresh_one_partition "SELECT id, age, par FROM ${mvName} " + + //refresh auto + sql """ + REFRESH MATERIALIZED VIEW ${mvName} auto + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_refresh_auto "SELECT id, age, par FROM ${mvName} " + order_qt_is_sync_before_rebuild "select SyncWithBaseTables from mv_infos('database'='${dbName}') where Name='${mvName}'" + + // rebuild catalog, should not Affects MTMV + sql """drop catalog if exists ${catalogName}""" + sql """ + create catalog if not exists ${catalogName} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + order_qt_is_sync_after_rebuild "select SyncWithBaseTables from mv_infos('database'='${dbName}') where Name='${mvName}'" + + // should refresh normal after catalog rebuild + sql """ + REFRESH MATERIALIZED VIEW ${mvName} complete + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_refresh_complete_rebuild "SELECT id, age, par FROM ${mvName} " + + sql """drop materialized view if exists ${mvName};""" + + // not have partition + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + KEY(`id`) + COMMENT "comment1" + DISTRIBUTED BY HASH(`id`) BUCKETS 2 + PROPERTIES ('replication_num' = '1',"grace_period"="333") + AS + SELECT id,age,par FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_1; + """ + order_qt_not_partition_before "select SyncWithBaseTables from mv_infos('database'='${dbName}') where Name='${mvName}'" + //should can refresh auto + sql """ + REFRESH MATERIALIZED VIEW ${mvName} auto + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_not_partition "SELECT id, age, par FROM ${mvName} " + order_qt_not_partition_after "select SyncWithBaseTables from mv_infos('database'='${dbName}') where Name='${mvName}'" + sql """drop materialized view if exists ${mvName};""" + + // refresh on schedule + // sql """ + // CREATE MATERIALIZED VIEW ${mvName} + // BUILD IMMEDIATE REFRESH COMPLETE ON SCHEDULE EVERY 10 SECOND STARTS "9999-12-13 21:07:09" + // KEY(`id`) + // COMMENT "comment1" + // DISTRIBUTED BY HASH(`id`) BUCKETS 2 + // PROPERTIES ('replication_num' = '1',"grace_period"="333") + // AS + // SELECT * FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_1; + // """ + // waitingMTMVTaskFinishedByMvName(mvName) + // sql """drop materialized view if exists ${mvName};""" + + // refresh on schedule + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD IMMEDIATE REFRESH AUTO ON commit + KEY(`id`) + COMMENT "comment1" + DISTRIBUTED BY HASH(`id`) BUCKETS 2 + PROPERTIES ('replication_num' = '1',"grace_period"="333") + AS + SELECT id,age,par FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_1; + """ + waitingMTMVTaskFinishedByMvName(mvName) + sql """drop materialized view if exists ${mvName};""" + + // cross db and join internal table + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`par`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + SELECT a.id, a.age, a.par, b.user_id, b.num FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_1 a left join internal.${otherDbName}.${tableName} b on a.id=b.user_id; + """ + def showJoinPartitionsResult = sql """show partitions from ${mvName}""" + logger.info("showJoinPartitionsResult: " + showJoinPartitionsResult.toString()) + assertTrue(showJoinPartitionsResult.toString().contains("p_a")) + assertTrue(showJoinPartitionsResult.toString().contains("p_b")) + + sql """ + REFRESH MATERIALIZED VIEW ${mvName} partitions(p_a); + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_join_one_partition "SELECT id, age, par, user_id, num FROM ${mvName} " + sql """drop materialized view if exists ${mvName};""" + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`create_date`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + SELECT id, name, value, create_date FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_two_partitions; + """ + def showTwoPartitionsResult = sql """show partitions from ${mvName}""" + logger.info("showTwoPartitionsResult: " + showTwoPartitionsResult.toString()) + assertTrue(showTwoPartitionsResult.toString().contains("p_20200101")) + assertTrue(showTwoPartitionsResult.toString().contains("p_20380101")) + assertTrue(showTwoPartitionsResult.toString().contains("p_20380102")) + sql """ + REFRESH MATERIALIZED VIEW ${mvName} auto; + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_two_partition "SELECT id, name, value, create_date FROM ${mvName} " + sql """drop materialized view if exists ${mvName};""" + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`create_date`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1','partition_sync_limit'='2','partition_date_format'='%Y-%m-%d', + 'partition_sync_time_unit'='MONTH') + AS + SELECT id, name, value, create_date FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_two_partitions; + """ + def showLimitPartitionsResult = sql """show partitions from ${mvName}""" + logger.info("showLimitPartitionsResult: " + showLimitPartitionsResult.toString()) + assertFalse(showLimitPartitionsResult.toString().contains("p_20200101")) + assertTrue(showLimitPartitionsResult.toString().contains("p_20380101")) + assertTrue(showLimitPartitionsResult.toString().contains("p_20380102")) + sql """ + REFRESH MATERIALIZED VIEW ${mvName} auto; + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_limit_partition "SELECT id, name, value, create_date FROM ${mvName} " + sql """drop materialized view if exists ${mvName};""" + + // not allow date trunc + test { + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by (date_trunc(`create_date`,'month')) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1','partition_sync_limit'='2','partition_date_format'='%Y-%m-%d', + 'partition_sync_time_unit'='MONTH') + AS + SELECT id, name, value, create_date FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_two_partitions; + """ + exception "only support" + } + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`region`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + SELECT id, name, value, region FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_null_partition; + """ + def showNullPartitionsResult = sql """show partitions from ${mvName}""" + logger.info("showNullPartitionsResult: " + showNullPartitionsResult.toString()) + // assertTrue(showNullPartitionsResult.toString().contains("p_null")) + assertTrue(showNullPartitionsResult.toString().contains("p_NULL")) + assertTrue(showNullPartitionsResult.toString().contains("p_bj")) + sql """ + REFRESH MATERIALIZED VIEW ${mvName} auto; + """ + waitingMTMVTaskFinishedByMvName(mvName) + // Will lose null data + order_qt_null_partition "SELECT id, name, value, region FROM ${mvName} " + sql """drop materialized view if exists ${mvName};""" + + sql """drop catalog if exists ${catalogName}""" + +} + diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_olap_rewrite_mtmv.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_olap_rewrite_mtmv.groovy new file mode 100644 index 00000000000000..5f290ea7237e56 --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_olap_rewrite_mtmv.groovy @@ -0,0 +1,121 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_olap_rewrite_mtmv", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + String suiteName = "test_hudi_olap_rewrite_mtmv" + String catalogName = "${suiteName}_catalog" + String mvName = "${suiteName}_mv" + String dbName = context.config.getDbNameByFile(context.file) + String tableName = "${suiteName}_table" + sql """drop table if exists ${tableName}""" + sql """ + CREATE TABLE ${tableName} ( + `user_id` INT, + `num` INT + ) ENGINE=OLAP + DUPLICATE KEY(`user_id`) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 2 + PROPERTIES ('replication_num' = '1') ; + """ + sql """ + insert into ${tableName} values(1,2); + """ + + sql """analyze table internal.`${dbName}`. ${tableName} with sync""" + sql """alter table internal.`${dbName}`. ${tableName} modify column user_id set stats ('row_count'='1');""" + + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """set materialized_view_rewrite_enable_contain_external_table=true;""" + String mvSql = "SELECT a.id, a.age, a.par, b.user_id, b.num FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_1 a left join ${tableName} b on a.id=b.user_id;"; + + sql """drop catalog if exists ${catalogName}""" + sql """ + create catalog if not exists ${catalogName} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """analyze table ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_1 with sync""" + sql """alter table ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_1 modify column par set stats ('row_count'='10');""" + + sql """drop materialized view if exists ${mvName};""" + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`par`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + ${mvSql} + """ + def showPartitionsResult = sql """show partitions from ${mvName}""" + logger.info("showPartitionsResult: " + showPartitionsResult.toString()) + assertTrue(showPartitionsResult.toString().contains("p_a")) + assertTrue(showPartitionsResult.toString().contains("p_b")) + + // refresh one partitions + sql """ + REFRESH MATERIALIZED VIEW ${mvName} partitions(p_a); + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_refresh_one_partition "SELECT id, age, par, user_id, num FROM ${mvName} " + + def explainOnePartition = sql """ explain ${mvSql} """ + logger.info("explainOnePartition: " + explainOnePartition.toString()) + assertTrue(explainOnePartition.toString().contains("VUNION")) + order_qt_refresh_one_partition_rewrite "${mvSql}" + + mv_rewrite_success("${mvSql}", "${mvName}") + + // select p_b should not rewrite + mv_not_part_in("SELECT a.id, a.age, a.par, b.user_id, b.num FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_1 a left join ${tableName} b on a.id=b.user_id where a.par='b';", "${mvName}") + + //refresh auto + sql """ + REFRESH MATERIALIZED VIEW ${mvName} auto + """ + waitingMTMVTaskFinishedByMvName(mvName) + order_qt_refresh_auto "SELECT id, age, par, user_id, num FROM ${mvName} " + + def explainAllPartition = sql """ explain ${mvSql}; """ + logger.info("explainAllPartition: " + explainAllPartition.toString()) + assertTrue(explainAllPartition.toString().contains("VOlapScanNode")) + order_qt_refresh_all_partition_rewrite "${mvSql}" + + mv_rewrite_success("${mvSql}", "${mvName}") + + sql """drop materialized view if exists ${mvName};""" + sql """drop catalog if exists ${catalogName}""" +} + diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_orc_tables.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_orc_tables.groovy new file mode 100644 index 00000000000000..c011847167ea01 --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_orc_tables.groovy @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_orc_tables", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + + String catalog_name = "test_hudi_orc_tables" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + + qt_cow """ select id, name, value from orc_hudi_table_cow; """ + qt_mor """ select id, name, value from orc_hudi_table_mor; """ + + sql """drop catalog if exists ${catalog_name};""" +} + diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_partition_prune.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_partition_prune.groovy new file mode 100644 index 00000000000000..c6f7dd87e62d0d --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_partition_prune.groovy @@ -0,0 +1,367 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_partition_prune", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + + String catalog_name = "test_hudi_partition_prune" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop catalog if exists ${catalog_name};""" + + for (String use_hive_sync_partition : ['true','false']) { + + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true', + 'use_hive_sync_partition' = '${use_hive_sync_partition}' + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + + // Function to get commit timestamps dynamically from hudi_meta table function + def getCommitTimestamps = { table_name -> + def result = sql """ + SELECT timestamp + FROM hudi_meta("table"="${catalog_name}.regression_hudi.${table_name}", "query_type" = "timeline") + WHERE action = 'commit' OR action = 'deltacommit' + ORDER BY timestamp + """ + return result.collect { it[0] } + } + + // Get commit timestamps for two_partition_tb (used in time travel queries) + def timestamps_two_partition = getCommitTimestamps("two_partition_tb") + + + + def one_partition_1_1 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2024 ORDER BY id;""" + def one_partition_2_1 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2025 ORDER BY id;""" + def one_partition_3_all = """SELECT id,name,part1 FROM one_partition_tb ORDER BY id;""" + def one_partition_4_all = """SELECT id,name,part1 FROM one_partition_tb WHERE id = 5 ORDER BY id;""" + def one_partition_5_1 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2024 AND id >= 3 ORDER BY id;""" + + def two_partition_1_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 1 ORDER BY id;""" + def two_partition_2_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" + def two_partition_3_2 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' ORDER BY id;""" + def two_partition_4_all = """SELECT id,name,part1,part2 FROM two_partition_tb ORDER BY id;""" + def two_partition_5_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 2 AND id > 5 ORDER BY id;""" + def two_partition_6_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;""" + + def three_partition_1_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" + def three_partition_2_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2025 AND part3 = 'Q2' ORDER BY id;""" + def three_partition_3_3 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 ORDER BY id;""" + def three_partition_4_2 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q1' ORDER BY id;""" + def three_partition_5_all = """SELECT id,name,part1,part2,part3 FROM three_partition_tb ORDER BY id;""" + def three_partition_6_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;""" + def three_partition_7_7 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part2 = 2025 ORDER BY id;""" + def three_partition_8_2 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q2' AND id BETWEEN 6 AND 10 ORDER BY id;""" + + def one_partition_boolean = """SELECT id,name,part1 FROM boolean_partition_tb WHERE part1 = true ORDER BY id;""" + def one_partition_tinyint = """SELECT id,name,part1 FROM tinyint_partition_tb WHERE part1 = 1 ORDER BY id;""" + def one_partition_smallint = """SELECT id,name,part1 FROM smallint_partition_tb WHERE part1 = 10 ORDER BY id;""" + def one_partition_int = """SELECT id,name,part1 FROM int_partition_tb WHERE part1 = 100 ORDER BY id;""" + def one_partition_bigint = """SELECT id,name,part1 FROM bigint_partition_tb WHERE part1 = 1234567890 ORDER BY id;""" + def one_partition_string = """SELECT id,name,part1 FROM string_partition_tb WHERE part1 = 'RegionA' ORDER BY id;""" + def one_partition_date = """SELECT id,name,part1 FROM date_partition_tb WHERE part1 = '2023-12-01' ORDER BY id;""" + def one_partition_timestamp = """SELECT id,name,part1 FROM timestamp_partition_tb WHERE part1 = '2023-12-01 08:00:00' ORDER BY id;""" + + + + qt_one_partition_1_1 one_partition_1_1 + explain { + sql("${one_partition_1_1}") + contains "partition=1/2" + } + + qt_one_partition_2_1 one_partition_2_1 + explain { + sql("${one_partition_2_1}") + contains "partition=1/2" + } + + qt_one_partition_3_all one_partition_3_all + explain { + sql("${one_partition_3_all}") + contains "partition=2/2" + } + + qt_one_partition_4_all one_partition_4_all + explain { + sql("${one_partition_4_all}") + contains "partition=2/2" + } + + qt_one_partition_5_1 one_partition_5_1 + explain { + sql("${one_partition_5_1}") + contains "partition=1/2" + } + + + qt_two_partition_1_1 two_partition_1_1 + explain { + sql("${two_partition_1_1}") + contains "partition=1/4" + } + + qt_two_partition_2_1 two_partition_2_1 + explain { + sql("${two_partition_2_1}") + contains "partition=1/4" + } + + qt_two_partition_3_2 two_partition_3_2 + explain { + sql("${two_partition_3_2}") + contains "partition=2/4" + } + + qt_two_partition_4_all two_partition_4_all + explain { + sql("${two_partition_4_all}") + contains "partition=4/4" + } + + qt_two_partition_5_1 two_partition_5_1 + explain { + sql("${two_partition_5_1}") + contains "partition=1/4" + } + + qt_two_partition_6_1 two_partition_6_1 + explain { + sql("${two_partition_6_1}") + contains "partition=1/4" + } + + + + qt_three_partition_1_1 three_partition_1_1 + explain { + sql("${three_partition_1_1}") + contains "partition=1/10" + } + + qt_three_partition_2_1 three_partition_2_1 + explain { + sql("${three_partition_2_1}") + contains "partition=1/10" + } + + qt_three_partition_3_3 three_partition_3_3 + explain { + sql("${three_partition_3_3}") + contains "partition=3/10" + } + + qt_three_partition_4_2 three_partition_4_2 + explain { + sql("${three_partition_4_2}") + contains "partition=2/10" + } + + qt_three_partition_5_all three_partition_5_all + explain { + sql("${three_partition_5_all}") + contains "partition=10/10" + } + + qt_three_partition_6_1 three_partition_6_1 + explain { + sql("${three_partition_6_1}") + contains "partition=1/10" + } + + qt_three_partition_7_7 three_partition_7_7 + explain { + sql("${three_partition_7_7}") + contains "partition=7/10" + } + + qt_three_partition_8_2 three_partition_8_2 + explain { + sql("${three_partition_8_2}") + contains "partition=2/10" + } + + + // 0 partitions + def one_partition_6_0 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2023 ORDER BY id;""" + qt_one_partition_6_0 one_partition_6_0 + explain { + sql("${one_partition_6_0}") + contains "partition=0/2" + } + + def two_partition_7_0 = """SELECT id,name,part1 FROM two_partition_tb WHERE part1 = 'CN' AND part2 = 1 ORDER BY id;""" + qt_two_partition_7_0 two_partition_7_0 + explain { + sql("${two_partition_7_0}") + contains "partition=0/4" + } + + def two_partition_8_0 = """SELECT id,name,part1 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 3 ORDER BY id;""" + qt_two_partition_8_0 two_partition_8_0 + explain { + sql("${two_partition_8_0}") + contains "partition=0/4" + } + + def three_partition_9_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2023 AND part3 = 'Q1' ORDER BY id;""" + qt_three_partition_9_0 three_partition_9_0 + explain { + sql("${three_partition_9_0}") + contains "partition=0/10" + } + + def three_partition_10_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q4' ORDER BY id;""" + qt_three_partition_10_0 three_partition_10_0 + explain { + sql("${three_partition_10_0}") + contains "partition=0/10" + } + + def three_partition_11_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 AND part3 = 'Q4' ORDER BY id;""" + qt_three_partition_11_0 three_partition_11_0 + explain { + sql("${three_partition_11_0}") + contains "partition=0/10" + } + + + //time travel - use dynamic commit timestamps + // Note: two_partition_tb has 10 INSERT statements, creating 10 commits + // Final partitions: (US,1), (US,2), (EU,1), (EU,2) = 4 partitions total + if (timestamps_two_partition.size() >= 10) { + // Use the last commit timestamp (all 10 records, 4 partitions) + def last_commit = timestamps_two_partition[9] + def time_travel_two_partition_1_4 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '${last_commit}' order by id;" + def time_travel_two_partition_2_2 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '${last_commit}' where part1='US' order by id;" + def time_travel_two_partition_3_2 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '${last_commit}' where part2=2 order by id;" + def time_travel_two_partition_4_0 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '${last_commit}' where part2=10 order by id;" + + qt_time_travel_two_partition_1_4 time_travel_two_partition_1_4 + explain { + sql("${time_travel_two_partition_1_4}") + contains "partition=4/4" + } + + qt_time_travel_two_partition_2_2 time_travel_two_partition_2_2 + explain { + sql("${time_travel_two_partition_2_2}") + contains "partition=2/4" + } + + qt_time_travel_two_partition_3_2 time_travel_two_partition_3_2 + explain { + sql("${time_travel_two_partition_3_2}") + contains "partition=2/4" + } + + qt_time_travel_two_partition_4_0 time_travel_two_partition_4_0 + explain { + sql("${time_travel_two_partition_4_0}") + contains "partition=0/4" + } + + // Use the first commit (after first INSERT: 1 record in partition US,1) + def first_commit = timestamps_two_partition[0] + def time_travel_two_partition_5_1 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '${first_commit}' order by id;" + qt_time_travel_two_partition_5_1 time_travel_two_partition_5_1 + explain { + sql("${time_travel_two_partition_5_1}") + // First commit should have 1 record in partition (US, part2=1) + contains "partition=1/1" + } + + // Use a middle commit (after 3 inserts: 3 records in partition US,1) + def middle_commit = timestamps_two_partition[2] + def time_travel_two_partition_6_1 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '${middle_commit}' order by id;" + qt_time_travel_two_partition_6_1 time_travel_two_partition_6_1 + explain { + sql("${time_travel_two_partition_6_1}") + // After 3 inserts, should have 1 partition (US, part2=1) with 3 records + contains "partition=1/1" + } + } + + // all types as partition + qt_one_partition_boolean one_partition_boolean + explain { + sql("${one_partition_boolean}") + contains "partition=1/2" + } + qt_one_partition_tinyint one_partition_tinyint + explain { + sql("${one_partition_tinyint}") + contains "partition=1/2" + } + qt_one_partition_smallint one_partition_smallint + explain { + sql("${one_partition_smallint}") + contains "partition=1/2" + } + qt_one_partition_int one_partition_int + explain { + sql("${one_partition_int}") + contains "partition=1/2" + } + qt_one_partition_bigint one_partition_bigint + explain { + sql("${one_partition_bigint}") + contains "partition=1/2" + } + qt_one_partition_string one_partition_string + explain { + sql("${one_partition_string}") + contains "partition=1/2" + } + qt_one_partition_date one_partition_date + explain { + sql("${one_partition_date}") + contains "partition=1/2" + } + qt_one_partition_timestamp one_partition_timestamp + explain { + sql("${one_partition_timestamp}") + contains "partition=1/2" + } + + sql """drop catalog if exists ${catalog_name};""" + + + } + +} \ No newline at end of file diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_rewrite_mtmv.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_rewrite_mtmv.groovy new file mode 100644 index 00000000000000..e695de8c3b7c28 --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_rewrite_mtmv.groovy @@ -0,0 +1,180 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_rewrite_mtmv", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + String suiteName = "test_hudi_rewrite_mtmv" + String catalogName = "${suiteName}_catalog" + String mvName = "${suiteName}_mv" + String dbName = context.config.getDbNameByFile(context.file) + + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """set materialized_view_rewrite_enable_contain_external_table=true;""" + String mvSql = "SELECT par,count(*) as num FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_1 group by par;"; + + sql """drop catalog if exists ${catalogName}""" + sql """ + create catalog if not exists ${catalogName} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """analyze table ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_1 with sync""" + sql ''' +alter table test_hudi_rewrite_mtmv_catalog.hudi_mtmv_regression_test.hudi_table_1 +modify column age set stats ( + 'ndv'='10', + 'num_nulls'='0', + 'min_value'='1', + 'max_value'='10', + 'row_count'='10' +); +''' + + sql ''' +alter table test_hudi_rewrite_mtmv_catalog.hudi_mtmv_regression_test.hudi_table_1 +modify column _hoodie_record_key set stats ( + 'ndv'='10', + 'num_nulls'='0', + 'min_value'='20250121171615893_0_0', + 'max_value'='20250121171615893_7_1', + 'row_count'='10' +); +''' + + sql ''' +alter table test_hudi_rewrite_mtmv_catalog.hudi_mtmv_regression_test.hudi_table_1 +modify column id set stats ( + 'ndv'='10', + 'num_nulls'='0', + 'min_value'='1', + 'max_value'='10', + 'row_count'='10' +); +''' + + sql ''' +alter table test_hudi_rewrite_mtmv_catalog.hudi_mtmv_regression_test.hudi_table_1 +modify column _hoodie_file_name set stats ( + 'ndv'='2', + 'num_nulls'='0', + 'min_value'='58eabd3f-1996-4cb6-83e4-56fd11cb4e7d-0_0-30-108_20250121171615893.parquet', + 'max_value'='7f98e9ac-bd11-48fd-ac80-9ca6dc1ddb34-0_1-30-109_20250121171615893.parquet', + 'row_count'='10' +); +''' + + sql ''' +alter table test_hudi_rewrite_mtmv_catalog.hudi_mtmv_regression_test.hudi_table_1 +modify column _hoodie_partition_path set stats ( + 'ndv'='2', + 'num_nulls'='0', + 'min_value'='par=a', + 'max_value'='par=b', + 'row_count'='10' +); +''' + + sql ''' +alter table test_hudi_rewrite_mtmv_catalog.hudi_mtmv_regression_test.hudi_table_1 +modify column _hoodie_commit_seqno set stats ( + 'ndv'='10', + 'num_nulls'='0', + 'min_value'='20250121171615893_0_0', + 'max_value'='20250121171615893_1_4', + 'row_count'='10' +); +''' + + sql ''' +alter table test_hudi_rewrite_mtmv_catalog.hudi_mtmv_regression_test.hudi_table_1 +modify column _hoodie_commit_time set stats ( + 'ndv'='1', + 'num_nulls'='0', + 'min_value'='20250121171615893', + 'max_value'='20250121171615893', + 'row_count'='10' +); +''' + + sql """drop materialized view if exists ${mvName};""" + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by(`par`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + ${mvSql} + """ + def showPartitionsResult = sql """show partitions from ${mvName}""" + logger.info("showPartitionsResult: " + showPartitionsResult.toString()) + assertTrue(showPartitionsResult.toString().contains("p_a")) + assertTrue(showPartitionsResult.toString().contains("p_b")) + + // refresh one partitions + sql """ + REFRESH MATERIALIZED VIEW ${mvName} partitions(p_a); + """ + waitingMTMVTaskFinishedByMvName(mvName) + sql """analyze table ${mvName} with sync""" + order_qt_refresh_one_partition "SELECT par, num FROM ${mvName} " + + sql """alter table ${mvName} modify column par set stats ('row_count'='1');""" + + mv_rewrite_success(mvSql, mvName) + order_qt_refresh_one_partition_rewrite "${mvSql}" + + mv_rewrite_success("${mvSql}", "${mvName}") + + // select p_b should not rewrite + mv_not_part_in("SELECT par,count(*) as num FROM ${catalogName}.`hudi_mtmv_regression_test`.hudi_table_1 where par='b' group by par;", "${mvName}") + + //refresh auto + sql """ + REFRESH MATERIALIZED VIEW ${mvName} auto + """ + waitingMTMVTaskFinishedByMvName(mvName) + sql """analyze table ${mvName} with sync""" + sql """alter table ${mvName} modify column par set stats ('row_count'='2');""" + order_qt_refresh_auto "SELECT par, num FROM ${mvName} " + + mv_rewrite_success(mvSql, mvName) + order_qt_refresh_all_partition_rewrite "${mvSql}" + + mv_rewrite_success("${mvSql}", "${mvName}") + + sql """drop materialized view if exists ${mvName};""" + sql """drop catalog if exists ${catalogName}""" +} + diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_runtime_filter_partition_pruning.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_runtime_filter_partition_pruning.groovy new file mode 100644 index 00000000000000..8da504f6903bdc --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_runtime_filter_partition_pruning.groovy @@ -0,0 +1,209 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_runtime_filter_partition_pruning", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + + String catalog_name = "test_hudi_runtime_filter_partition_pruning" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + + def test_runtime_filter_partition_pruning = { + // Test BOOLEAN partition + qt_runtime_filter_partition_pruning_boolean_1 """ + select count(*) from boolean_partition_tb where part1 = + (select part1 from boolean_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 1); + """ + + qt_runtime_filter_partition_pruning_boolean_2 """ + select count(*) from boolean_partition_tb where part1 in + (select part1 from boolean_partition_tb + group by part1 having count(*) > 0); + """ + + // Test TINYINT partition + qt_runtime_filter_partition_pruning_tinyint_1 """ + select count(*) from tinyint_partition_tb where part1 = + (select part1 from tinyint_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 1); + """ + + qt_runtime_filter_partition_pruning_tinyint_2 """ + select count(*) from tinyint_partition_tb where part1 in + (select part1 from tinyint_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 2); + """ + + // Test SMALLINT partition + qt_runtime_filter_partition_pruning_smallint_1 """ + select count(*) from smallint_partition_tb where part1 = + (select part1 from smallint_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 1); + """ + + qt_runtime_filter_partition_pruning_smallint_2 """ + select count(*) from smallint_partition_tb where part1 in + (select part1 from smallint_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 2); + """ + + // Test INT partition + qt_runtime_filter_partition_pruning_int_1 """ + select count(*) from int_partition_tb where part1 = + (select part1 from int_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 1); + """ + + qt_runtime_filter_partition_pruning_int_2 """ + select count(*) from int_partition_tb where part1 in + (select part1 from int_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 2); + """ + + qt_runtime_filter_partition_pruning_int_3 """ + select count(*) from int_partition_tb where abs(part1) = + (select part1 from int_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 1); + """ + + // Test BIGINT partition + qt_runtime_filter_partition_pruning_bigint_1 """ + select count(*) from bigint_partition_tb where part1 = + (select part1 from bigint_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 1); + """ + + qt_runtime_filter_partition_pruning_bigint_2 """ + select count(*) from bigint_partition_tb where part1 in + (select part1 from bigint_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 2); + """ + + // Test STRING partition + qt_runtime_filter_partition_pruning_string_1 """ + select count(*) from string_partition_tb where part1 = + (select part1 from string_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 1); + """ + + qt_runtime_filter_partition_pruning_string_2 """ + select count(*) from string_partition_tb where part1 in + (select part1 from string_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 2); + """ + + // Test DATE partition + qt_runtime_filter_partition_pruning_date_1 """ + select count(*) from date_partition_tb where part1 = + (select part1 from date_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 1); + """ + + qt_runtime_filter_partition_pruning_date_2 """ + select count(*) from date_partition_tb where part1 in + (select part1 from date_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 2); + """ + + // Test TIMESTAMP partition + qt_runtime_filter_partition_pruning_timestamp_1 """ + select count(*) from timestamp_partition_tb where part1 = + (select part1 from timestamp_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 1); + """ + + qt_runtime_filter_partition_pruning_timestamp_2 """ + select count(*) from timestamp_partition_tb where part1 in + (select part1 from timestamp_partition_tb + group by part1 having count(*) > 0 + order by part1 desc limit 2); + """ + + // Additional complex scenarios with multiple filters + qt_runtime_filter_partition_pruning_complex_1 """ + select count(*) from three_partition_tb t1 + where t1.part1 in ( + select t2.part1 from three_partition_tb t2 + where t2.part2 = 2024 + group by t2.part1 having count(*) > 2 + ); + """ + + qt_runtime_filter_partition_pruning_complex_2 """ + select count(*) from two_partition_tb t1 + where t1.part1 = 'US' and t1.part2 in ( + select t2.part2 from two_partition_tb t2 + where t2.part1 = 'US' + group by t2.part2 having count(*) > 1 + ); + """ + } + + try { + // Test with runtime filter partition pruning disabled + sql """ set enable_runtime_filter_partition_prune = false; """ + test_runtime_filter_partition_pruning() + + // Test with runtime filter partition pruning enabled + sql """ set enable_runtime_filter_partition_prune = true; """ + test_runtime_filter_partition_pruning() + + } finally { + // Restore default setting + sql """ set enable_runtime_filter_partition_prune = true; """ + } +} diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_schema_change.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_schema_change.groovy new file mode 100644 index 00000000000000..583daa6fc50a75 --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_schema_change.groovy @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_schema_change", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + + String catalog_name = "test_hudi_schema_change" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + sql """set force_jni_scanner = false;""" + + def hudi_sc_tbs = ["hudi_sc_orc_cow","hudi_sc_parquet_cow"] + + for (String hudi_sc_tb : hudi_sc_tbs) { + qt_hudi_0 """ SELECT id, name, age, city, score FROM ${hudi_sc_tb} ORDER BY id; """ + // TODO: Uncomment these test cases after RENAME COLUMN feature is implemented in Hudi + // The following queries use 'full_name' and 'location' columns which require RENAME COLUMN operation + // Currently RENAME COLUMN is disabled in 07_create_schema_change_tables.sql, so these tests are commented out + // qt_hudi_1 """ SELECT id, score, full_name, location FROM ${hudi_sc_tb} WHERE score > 90 ORDER BY id; """ + // qt_hudi_2 """ SELECT id, score, full_name, location FROM ${hudi_sc_tb} WHERE score < 90 ORDER BY id; """ + // qt_hudi_3 """ SELECT id, score, full_name, location FROM ${hudi_sc_tb} WHERE score = 90 ORDER BY id; """ + // qt_hudi_4 """ SELECT id, score, full_name, location FROM ${hudi_sc_tb} WHERE score IS NULL ORDER BY id; """ + // qt_hudi_5 """ SELECT id, score, full_name, location FROM ${hudi_sc_tb} WHERE location = 'New York' ORDER BY id; """ + // qt_hudi_6 """ SELECT id, score, full_name, location FROM ${hudi_sc_tb} WHERE location IS NULL ORDER BY id; """ + // qt_hudi_7 """ SELECT id, score, full_name, location FROM ${hudi_sc_tb} WHERE score > 85 AND location = 'San Francisco' ORDER BY id; """ + // qt_hudi_8 """ SELECT id, score, full_name, location FROM ${hudi_sc_tb} WHERE score < 100 OR location = 'Austin' ORDER BY id; """ + // qt_hudi_9 """ SELECT id, full_name FROM ${hudi_sc_tb} WHERE full_name LIKE 'A%' ORDER BY id; """ + // qt_hudi_10 """ SELECT id, score, full_name, location FROM ${hudi_sc_tb} WHERE id BETWEEN 3 AND 7 ORDER BY id; """ + // qt_hudi_11 """ SELECT id, score, full_name, location, age FROM ${hudi_sc_tb} WHERE age > 20 ORDER BY id; """ + // qt_hudi_12 """ SELECT id, score, full_name, location, age FROM ${hudi_sc_tb} WHERE age IS NULL ORDER BY id; """ + // qt_hudi_13 """ SELECT id, score, full_name, location, age FROM ${hudi_sc_tb} WHERE score > 100 AND age IS NOT NULL ORDER BY id; """ + // qt_hudi_14 """ SELECT id, score, full_name, location, age FROM ${hudi_sc_tb} WHERE location = 'cn' ORDER BY id; """ + // qt_hudi_15 """ SELECT id, score, full_name, location, age FROM ${hudi_sc_tb} WHERE full_name = 'QQ' AND age > 20 ORDER BY id; """ + // qt_hudi_16 """ SELECT id, score, full_name, location, age FROM ${hudi_sc_tb} WHERE score < 100 OR age < 25 ORDER BY id; """ + // qt_hudi_17 """ SELECT id, score, full_name, location, age FROM ${hudi_sc_tb} WHERE age BETWEEN 20 AND 30 ORDER BY id; """ + // qt_hudi_18 """ SELECT id, score, full_name, location, age FROM ${hudi_sc_tb} WHERE location IS NULL AND age IS NULL ORDER BY id; """ + // qt_hudi_19 """ SELECT id, full_name, age FROM ${hudi_sc_tb} WHERE full_name LIKE 'Q%' AND age IS NOT NULL ORDER BY id; """ + // qt_hudi_20 """ SELECT id, score, full_name, location, age FROM ${hudi_sc_tb} WHERE id > 5 AND age IS NULL ORDER BY id; """ + } + + sql """drop catalog if exists ${catalog_name};""" +} + diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_schema_evolution.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_schema_evolution.groovy new file mode 100644 index 00000000000000..f65cda2bb07404 --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_schema_evolution.groovy @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_schema_evolution", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + + String catalog_name = "test_hudi_schema_evolution" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + + // Test with JNI scanner + sql """set force_jni_scanner = true;""" + + // Test adding_simple_columns_table: schema evolution adding columns (id, name) -> (id, name, age, city) + qt_jni_adding_simple_columns_table_all """ select id, name, age, city from adding_simple_columns_table order by id """ + qt_jni_adding_simple_columns_table_old_data """ select id, name, age, city from adding_simple_columns_table where id in ('1', '2', '3') order by id """ + qt_jni_adding_simple_columns_table_new_data """ select id, name, age, city from adding_simple_columns_table where id in ('4', '5', '6') order by id """ + + // Test deleting_simple_columns_table: schema evolution dropping columns (id, name, age, city) -> (id, name) + qt_jni_deleting_simple_columns_table_all """ select id, name from deleting_simple_columns_table order by id """ + qt_jni_deleting_simple_columns_table_old_data """ select id, name from deleting_simple_columns_table where id in ('1', '2', '3') order by id """ + qt_jni_deleting_simple_columns_table_new_data """ select id, name from deleting_simple_columns_table where id in ('4', '5', '6') order by id """ + + // Test renaming_simple_columns_table: schema evolution renaming column name -> full_name + // Note: Hudi doesn't support RENAME COLUMN, so this test is skipped + // qt_jni_renaming_simple_columns_table_all """ select id, full_name from renaming_simple_columns_table order by id """ + // qt_jni_renaming_simple_columns_table_old_data """ select id, full_name from renaming_simple_columns_table where id in ('1', '2', '3') order by id """ + // qt_jni_renaming_simple_columns_table_new_data """ select id, full_name from renaming_simple_columns_table where id in ('4', '5', '6') order by id """ + + // Test reordering_columns_table: schema evolution reordering columns + qt_jni_reordering_columns_table_all """ select id, name, age from reordering_columns_table order by id """ + + // Test adding_complex_columns_table: schema evolution adding email field to struct + qt_jni_adding_complex_columns_table_all """ select id, name, info from adding_complex_columns_table order by id """ + qt_jni_adding_complex_columns_table_old_struct """ select id, name, info from adding_complex_columns_table where id in ('1', '2', '3') order by id """ + qt_jni_adding_complex_columns_table_new_struct """ select id, name, info from adding_complex_columns_table where id in ('4', '5', '6') order by id """ + + // Test deleting_complex_columns_table: schema evolution dropping email field from struct + qt_jni_deleting_complex_columns_table_all """ select id, name, info from deleting_complex_columns_table order by id """ + qt_jni_deleting_complex_columns_table_old_struct """ select id, name, info from deleting_complex_columns_table where id in ('1', '2', '3') order by id """ + qt_jni_deleting_complex_columns_table_new_struct """ select id, name, info from deleting_complex_columns_table where id in ('4', '5', '6') order by id """ + + // Test renaming_complex_columns_table: schema evolution renaming location -> address in struct + // Note: Hudi doesn't support renaming struct fields, so this test is skipped + // qt_jni_renaming_complex_columns_table_all """ select * from renaming_complex_columns_table order by id """ + // qt_jni_renaming_complex_columns_table_old_struct """ select id, name, info from renaming_complex_columns_table where id in ('1', '2', '3') order by id """ + // qt_jni_renaming_complex_columns_table_new_struct """ select id, name, info from renaming_complex_columns_table where id in ('4', '5', '6') order by id """ + + // Test with native scanner + sql """set force_jni_scanner = false;""" + + // Test adding_simple_columns_table: schema evolution adding columns (id, name) -> (id, name, age, city) + qt_native_adding_simple_columns_table_all """ select id, name, age, city from adding_simple_columns_table order by id """ + qt_native_adding_simple_columns_table_old_data """ select id, name, age, city from adding_simple_columns_table where id in ('1', '2', '3') order by id """ + qt_native_adding_simple_columns_table_new_data """ select id, name, age, city from adding_simple_columns_table where id in ('4', '5', '6') order by id """ + + // Test deleting_simple_columns_table: schema evolution dropping columns (id, name, age, city) -> (id, name) + qt_native_deleting_simple_columns_table_all """ select id, name from deleting_simple_columns_table order by id """ + qt_native_deleting_simple_columns_table_old_data """ select id, name from deleting_simple_columns_table where id in ('1', '2', '3') order by id """ + qt_native_deleting_simple_columns_table_new_data """ select id, name from deleting_simple_columns_table where id in ('4', '5', '6') order by id """ + + // Test renaming_simple_columns_table: schema evolution renaming column name -> full_name + // Note: Hudi doesn't support RENAME COLUMN, so this test is skipped + // qt_native_renaming_simple_columns_table_all """ select id, full_name from renaming_simple_columns_table order by id """ + // qt_native_renaming_simple_columns_table_old_data """ select id, full_name from renaming_simple_columns_table where id in ('1', '2', '3') order by id """ + // qt_native_renaming_simple_columns_table_new_data """ select id, full_name from renaming_simple_columns_table where id in ('4', '5', '6') order by id """ + + // Test reordering_columns_table: schema evolution reordering columns + qt_native_reordering_columns_table_all """ select id, name, age from reordering_columns_table order by id """ + + // Test adding_complex_columns_table: schema evolution adding email field to struct + qt_native_adding_complex_columns_table_all """ select id, name, info from adding_complex_columns_table order by id """ + qt_native_adding_complex_columns_table_old_struct """ select id, name, info from adding_complex_columns_table where id in ('1', '2', '3') order by id """ + qt_native_adding_complex_columns_table_new_struct """ select id, name, info from adding_complex_columns_table where id in ('4', '5', '6') order by id """ + + // Test deleting_complex_columns_table: schema evolution dropping email field from struct + qt_native_deleting_complex_columns_table_all """ select id, name, info from deleting_complex_columns_table order by id """ + qt_native_deleting_complex_columns_table_old_struct """ select id, name, info from deleting_complex_columns_table where id in ('1', '2', '3') order by id """ + qt_native_deleting_complex_columns_table_new_struct """ select id, name, info from deleting_complex_columns_table where id in ('4', '5', '6') order by id """ + + // Test renaming_complex_columns_table: schema evolution renaming location -> address in struct + // Note: Hudi doesn't support renaming struct fields, so this test is skipped + // qt_native_renaming_complex_columns_table_all """ select * from renaming_complex_columns_table order by id """ + // qt_native_renaming_complex_columns_table_old_struct """ select id, name, info from renaming_complex_columns_table where id in ('1', '2', '3') order by id """ + // qt_native_renaming_complex_columns_table_new_struct """ select id, name, info from renaming_complex_columns_table where id in ('4', '5', '6') order by id """ + + sql """drop catalog if exists ${catalog_name};""" +} diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_snapshot.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_snapshot.groovy new file mode 100644 index 00000000000000..4cdb7995f3d456 --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_snapshot.groovy @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_snapshot", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + + String catalog_name = "test_hudi_snapshot" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + + def test_hudi_snapshot_querys = { table_name -> + // Query all records ordered by event_time in descending order + order_qt_q01 """SELECT user_id, event_time, action FROM ${table_name} ORDER BY event_time DESC LIMIT 10;""" + + // Query specific user's activity records + order_qt_q02 """SELECT user_id, event_time, action FROM ${table_name} WHERE user_id = 1 ORDER BY event_time LIMIT 5;""" + + // Query events within a specific time range + order_qt_q03 """SELECT user_id, event_time, action FROM ${table_name} WHERE event_time BETWEEN 1710000000000 AND 1710000003000 ORDER BY event_time LIMIT 10;""" + + // Query by action type + order_qt_q04 """SELECT user_id, event_time, action FROM ${table_name} WHERE action = 'login' ORDER BY event_time LIMIT 5;""" + + // Count records by action + order_qt_q05 """SELECT action, COUNT(*) AS action_count FROM ${table_name} GROUP BY action ORDER BY action_count DESC;""" + + // Query user_id and action + order_qt_q06 """SELECT user_id, action FROM ${table_name} ORDER BY user_id LIMIT 5;""" + } + + def test_hudi_snapshot_querys_partitioned = { table_name -> + // Query all records ordered by event_time in descending order + order_qt_q01 """SELECT user_id, event_time, action, dt FROM ${table_name} ORDER BY event_time DESC LIMIT 10;""" + + // Query specific user's activity records + order_qt_q02 """SELECT user_id, event_time, action, dt FROM ${table_name} WHERE user_id = 1 ORDER BY event_time LIMIT 5;""" + + // Query events within a specific time range + order_qt_q03 """SELECT user_id, event_time, action, dt FROM ${table_name} WHERE event_time BETWEEN 1710000000000 AND 1710000003000 ORDER BY event_time LIMIT 10;""" + + // Query by action type + order_qt_q04 """SELECT user_id, event_time, action, dt FROM ${table_name} WHERE action = 'login' ORDER BY event_time LIMIT 5;""" + + // Count records by action + order_qt_q05 """SELECT action, COUNT(*) AS action_count FROM ${table_name} GROUP BY action ORDER BY action_count DESC;""" + + // Query user_id and action + order_qt_q06 """SELECT user_id, action FROM ${table_name} ORDER BY user_id LIMIT 5;""" + + // Query by partition column (dt) + order_qt_q07 """SELECT user_id, event_time, action, dt FROM ${table_name} WHERE dt = '2024-03-01' ORDER BY event_time LIMIT 5;""" + + // Query user_id and partition column + order_qt_q08 """SELECT user_id, dt FROM ${table_name} ORDER BY dt, user_id LIMIT 5;""" + } + + sql """set force_jni_scanner=true;""" + test_hudi_snapshot_querys("user_activity_log_mor_non_partition") + test_hudi_snapshot_querys_partitioned("user_activity_log_mor_partition") + test_hudi_snapshot_querys("user_activity_log_cow_non_partition") + test_hudi_snapshot_querys_partitioned("user_activity_log_cow_partition") + + sql """set force_jni_scanner=false;""" + test_hudi_snapshot_querys("user_activity_log_mor_non_partition") + test_hudi_snapshot_querys_partitioned("user_activity_log_mor_partition") + test_hudi_snapshot_querys("user_activity_log_cow_non_partition") + test_hudi_snapshot_querys_partitioned("user_activity_log_cow_partition") + + sql """drop catalog if exists ${catalog_name};""" +} + diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_timestamp.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_timestamp.groovy new file mode 100644 index 00000000000000..8ca848b9d5eaa5 --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_timestamp.groovy @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_timestamp", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + + String catalog_name = "test_hudi_timestamp" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + + def test_timestamp_different_timezones = { + sql """set time_zone = 'America/Los_Angeles';""" + qt_timestamp1 """ select id, name, event_time from hudi_table_with_timestamp order by id; """ + sql """set time_zone = 'Asia/Shanghai';""" + qt_timestamp2 """ select id, name, event_time from hudi_table_with_timestamp order by id; """ + sql """set time_zone = 'UTC';""" + qt_timestamp3 """ select id, name, event_time from hudi_table_with_timestamp order by id; """ + } + + // test native reader + test_timestamp_different_timezones() + sql """ set force_jni_scanner = true; """ + // test jni reader + test_timestamp_different_timezones() + sql """ set force_jni_scanner = false; """ + + + sql """drop catalog if exists ${catalog_name};""" +} + +// DROP TABLE IF EXISTS hudi_table_with_timestamp; + +// -- create table +// CREATE TABLE hudi_table_with_timestamp ( +// id STRING, +// name STRING, +// event_time TIMESTAMP +// ) USING HUDI +// OPTIONS ( +// type = 'cow', +// primaryKey = 'id', +// preCombineField = 'event_time' +// ); + +// SET TIME ZONE 'America/Los_Angeles'; + +// INSERT OVERWRITE hudi_table_with_timestamp VALUES +// ('1', 'Alice', timestamp('2024-10-25 08:00:00')), +// ('2', 'Bob', timestamp('2024-10-25 09:30:00')), +// ('3', 'Charlie', timestamp('2024-10-25 11:00:00')); diff --git a/regression-test/suites/external_table_p0/hudi/test_hudi_timetravel.groovy b/regression-test/suites/external_table_p0/hudi/test_hudi_timetravel.groovy new file mode 100644 index 00000000000000..772849bc6058ab --- /dev/null +++ b/regression-test/suites/external_table_p0/hudi/test_hudi_timetravel.groovy @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hudi_timetravel", "p0,external,hudi,external_docker,external_docker_hudi") { + String enabled = context.config.otherConfigs.get("enableHudiTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hudi test") + return + } + + String catalog_name = "test_hudi_timetravel" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hudiHmsPort = context.config.otherConfigs.get("hudiHmsPort") + String hudiMinioPort = context.config.otherConfigs.get("hudiMinioPort") + String hudiMinioAccessKey = context.config.otherConfigs.get("hudiMinioAccessKey") + String hudiMinioSecretKey = context.config.otherConfigs.get("hudiMinioSecretKey") + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hudiHmsPort}', + 's3.endpoint' = 'http://${externalEnvIp}:${hudiMinioPort}', + 's3.access_key' = '${hudiMinioAccessKey}', + 's3.secret_key' = '${hudiMinioSecretKey}', + 's3.region' = 'us-east-1', + 'use_path_style' = 'true' + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use regression_hudi;""" + sql """ set enable_fallback_to_original_planner=false """ + + // Function to get commit timestamps dynamically from hudi_meta table function + def getCommitTimestamps = { table_name -> + def result = sql """ + SELECT timestamp + FROM hudi_meta("table"="${catalog_name}.regression_hudi.${table_name}", "query_type" = "timeline") + WHERE action = 'commit' OR action = 'deltacommit' + ORDER BY timestamp + """ + return result.collect { it[0] } + } + + def test_hudi_timetravel_querys = { table_name, timestamps -> + timestamps.eachWithIndex { timestamp, index -> + def query_name = "qt_timetravel${index + 1}" + "${query_name}" """ select count(user_id) from ${table_name} for time as of "${timestamp}"; """ + } + } + + // Get commit timestamps dynamically for each table + def timestamps_cow_non_partition = getCommitTimestamps("user_activity_log_cow_non_partition") + def timestamps_cow_partition = getCommitTimestamps("user_activity_log_cow_partition") + def timestamps_mor_non_partition = getCommitTimestamps("user_activity_log_mor_non_partition") + def timestamps_mor_partition = getCommitTimestamps("user_activity_log_mor_partition") + + sql """set force_jni_scanner=true;""" + test_hudi_timetravel_querys("user_activity_log_cow_non_partition", timestamps_cow_non_partition) + test_hudi_timetravel_querys("user_activity_log_cow_partition", timestamps_cow_partition) + test_hudi_timetravel_querys("user_activity_log_mor_non_partition", timestamps_mor_non_partition) + test_hudi_timetravel_querys("user_activity_log_mor_partition", timestamps_mor_partition) + + sql """set force_jni_scanner=false;""" + test_hudi_timetravel_querys("user_activity_log_cow_non_partition", timestamps_cow_non_partition) + test_hudi_timetravel_querys("user_activity_log_cow_partition", timestamps_cow_partition) + test_hudi_timetravel_querys("user_activity_log_mor_non_partition", timestamps_mor_non_partition) + test_hudi_timetravel_querys("user_activity_log_mor_partition", timestamps_mor_partition) + + sql """drop catalog if exists ${catalog_name};""" +}