|
| 1 | +/* |
| 2 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 3 | + * you may not use this file except in compliance with the License. |
| 4 | + * You may obtain a copy of the License at |
| 5 | + * |
| 6 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 7 | + * |
| 8 | + * Unless required by applicable law or agreed to in writing, software |
| 9 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 10 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 11 | + * See the License for the specific language governing permissions and |
| 12 | + * limitations under the License. |
| 13 | + */ |
| 14 | +package com.facebook.presto.spark; |
| 15 | + |
| 16 | +import com.facebook.airlift.log.Logging; |
| 17 | +import com.facebook.presto.hive.metastore.Database; |
| 18 | +import com.facebook.presto.hive.metastore.ExtendedHiveMetastore; |
| 19 | +import com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils; |
| 20 | +import com.facebook.presto.spark.execution.nativeprocess.NativeExecutionModule; |
| 21 | +import com.facebook.presto.spark.execution.property.NativeExecutionConnectorConfig; |
| 22 | +import com.facebook.presto.spi.security.PrincipalType; |
| 23 | +import com.facebook.presto.testing.QueryRunner; |
| 24 | +import com.google.common.collect.ImmutableList; |
| 25 | +import com.google.common.collect.ImmutableMap; |
| 26 | +import com.google.inject.Module; |
| 27 | + |
| 28 | +import java.io.IOException; |
| 29 | +import java.io.UncheckedIOException; |
| 30 | +import java.nio.file.Files; |
| 31 | +import java.nio.file.Path; |
| 32 | +import java.nio.file.Paths; |
| 33 | +import java.util.Map; |
| 34 | +import java.util.Optional; |
| 35 | + |
| 36 | +import static com.facebook.airlift.log.Level.WARN; |
| 37 | +import static com.facebook.presto.hive.HiveTestUtils.getProperty; |
| 38 | +import static com.facebook.presto.nativeworker.NativeQueryRunnerUtils.getNativeWorkerHiveProperties; |
| 39 | +import static com.facebook.presto.nativeworker.NativeQueryRunnerUtils.getNativeWorkerSystemProperties; |
| 40 | +import static com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils.getNativeQueryRunnerParameters; |
| 41 | +import static com.facebook.presto.spark.PrestoSparkQueryRunner.METASTORE_CONTEXT; |
| 42 | +import static java.nio.file.Files.createTempDirectory; |
| 43 | + |
| 44 | +/** |
| 45 | + * Following JVM argument is needed to run Spark native tests. |
| 46 | + * <p> |
| 47 | + * - PRESTO_SERVER |
| 48 | + * - This tells Spark where to find the Presto native binary to launch the process. |
| 49 | + * Example: -DPRESTO_SERVER=/path/to/native/process/bin |
| 50 | + * <p> |
| 51 | + * - DATA_DIR |
| 52 | + * - Optional path to store TPC-H tables used in the test. If this directory is empty, it will be |
| 53 | + * populated. If tables already exists, they will be reused. |
| 54 | + * <p> |
| 55 | + * Tests can be running in Interactive Debugging Mode that allows for easier debugging |
| 56 | + * experience. Instead of launching its own native process, the test will connect to an existing |
| 57 | + * native process. This gives developers flexibility to connect IDEA and debuggers to the native process. |
| 58 | + * Enable this mode by setting NATIVE_PORT JVM argument. |
| 59 | + * <p> |
| 60 | + * - NATIVE_PORT |
| 61 | + * - This is the port your externally launched native process listens to. It is used to tell Spark where to send |
| 62 | + * requests. This port number has to be the same as to which your externally launched process listens. |
| 63 | + * Example: -DNATIVE_PORT=7777. |
| 64 | + * When NATIVE_PORT is specified, PRESTO_SERVER argument is not requires and is ignored if specified. |
| 65 | + * <p> |
| 66 | + * For test queries requiring shuffle, the disk-based local shuffle will be used. |
| 67 | + */ |
| 68 | +public class PrestoSparkNativeQueryRunnerUtils |
| 69 | +{ |
| 70 | + private static final int AVAILABLE_CPU_COUNT = 4; |
| 71 | + private static final String SPARK_SHUFFLE_MANAGER = "spark.shuffle.manager"; |
| 72 | + private static final String FALLBACK_SPARK_SHUFFLE_MANAGER = "spark.fallback.shuffle.manager"; |
| 73 | + private static final String DEFAULT_STORAGE_FORMAT = "DWRF"; |
| 74 | + private static Optional<Path> dataDirectory = Optional.empty(); |
| 75 | + |
| 76 | + private PrestoSparkNativeQueryRunnerUtils() {} |
| 77 | + |
| 78 | + public static Map<String, String> getNativeExecutionSessionConfigs() |
| 79 | + { |
| 80 | + ImmutableMap.Builder<String, String> builder = new ImmutableMap.Builder<String, String>() |
| 81 | + // Do not use default Prestissimo config files. Presto-Spark will generate the configs on-the-fly. |
| 82 | + .put("catalog.config-dir", "/") |
| 83 | + .put("task.info-update-interval", "100ms") |
| 84 | + .put("spark.initial-partition-count", "1") |
| 85 | + .put("register-test-functions", "true") |
| 86 | + .put("native-execution-program-arguments", "--logtostderr=1 --minloglevel=3") |
| 87 | + .put("spark.partition-count-auto-tune-enabled", "false"); |
| 88 | + |
| 89 | + if (System.getProperty("NATIVE_PORT") == null) { |
| 90 | + builder.put("native-execution-executable-path", getNativeQueryRunnerParameters().serverBinary.toString()); |
| 91 | + } |
| 92 | + |
| 93 | + try { |
| 94 | + builder.put("native-execution-broadcast-base-path", |
| 95 | + Files.createTempDirectory("native_broadcast").toAbsolutePath().toString()); |
| 96 | + } |
| 97 | + catch (IOException e) { |
| 98 | + throw new UncheckedIOException("Error creating temporary directory for broadcast", e); |
| 99 | + } |
| 100 | + |
| 101 | + return builder.build(); |
| 102 | + } |
| 103 | + |
| 104 | + public static PrestoSparkQueryRunner createHiveRunner() |
| 105 | + { |
| 106 | + PrestoSparkQueryRunner queryRunner = createRunner("hive", new NativeExecutionModule()); |
| 107 | + PrestoNativeQueryRunnerUtils.setupJsonFunctionNamespaceManager(queryRunner, "external_functions.json", "json"); |
| 108 | + |
| 109 | + return queryRunner; |
| 110 | + } |
| 111 | + |
| 112 | + private static PrestoSparkQueryRunner createRunner(String defaultCatalog, NativeExecutionModule nativeExecutionModule) |
| 113 | + { |
| 114 | + // Increases log level to reduce log spamming while running test. |
| 115 | + customizeLogging(); |
| 116 | + return createRunner( |
| 117 | + defaultCatalog, |
| 118 | + Optional.of(getBaseDataPath()), |
| 119 | + getNativeExecutionSessionConfigs(), |
| 120 | + getNativeExecutionShuffleConfigs(), |
| 121 | + ImmutableList.of(nativeExecutionModule)); |
| 122 | + } |
| 123 | + |
| 124 | + // Similar to createPrestoSparkNativeQueryRunner, but with custom connector config and without jsonFunctionNamespaceManager |
| 125 | + public static PrestoSparkQueryRunner createTpchRunner() |
| 126 | + { |
| 127 | + return createRunner( |
| 128 | + "tpchstandard", |
| 129 | + new NativeExecutionModule( |
| 130 | + Optional.of(new NativeExecutionConnectorConfig().setConnectorName("tpch")))); |
| 131 | + } |
| 132 | + |
| 133 | + public static PrestoSparkQueryRunner createRunner(String defaultCatalog, Optional<Path> baseDir, Map<String, String> additionalConfigProperties, Map<String, String> additionalSparkProperties, ImmutableList<Module> nativeModules) |
| 134 | + { |
| 135 | + ImmutableMap.Builder<String, String> configBuilder = ImmutableMap.builder(); |
| 136 | + configBuilder.putAll(getNativeWorkerSystemProperties()).putAll(additionalConfigProperties); |
| 137 | + Optional<Path> dataDir = baseDir.map(path -> Paths.get(path.toString() + '/' + DEFAULT_STORAGE_FORMAT)); |
| 138 | + PrestoSparkQueryRunner queryRunner = new PrestoSparkQueryRunner( |
| 139 | + defaultCatalog, |
| 140 | + configBuilder.build(), |
| 141 | + getNativeWorkerHiveProperties(DEFAULT_STORAGE_FORMAT), |
| 142 | + additionalSparkProperties, |
| 143 | + dataDir, |
| 144 | + nativeModules, |
| 145 | + AVAILABLE_CPU_COUNT); |
| 146 | + |
| 147 | + ExtendedHiveMetastore metastore = queryRunner.getMetastore(); |
| 148 | + if (!metastore.getDatabase(METASTORE_CONTEXT, "tpch").isPresent()) { |
| 149 | + metastore.createDatabase(METASTORE_CONTEXT, createDatabaseMetastoreObject("tpch")); |
| 150 | + } |
| 151 | + return queryRunner; |
| 152 | + } |
| 153 | + |
| 154 | + public static QueryRunner createJavaQueryRunner() |
| 155 | + throws Exception |
| 156 | + { |
| 157 | + return PrestoNativeQueryRunnerUtils.createJavaQueryRunner(Optional.of(getBaseDataPath()), "legacy", DEFAULT_STORAGE_FORMAT, true); |
| 158 | + } |
| 159 | + |
| 160 | + public static void customizeLogging() |
| 161 | + { |
| 162 | + Logging logging = Logging.initialize(); |
| 163 | + logging.setLevel("org.apache.spark", WARN); |
| 164 | + logging.setLevel("com.facebook.presto.spark", WARN); |
| 165 | + } |
| 166 | + |
| 167 | + private static Database createDatabaseMetastoreObject(String name) |
| 168 | + { |
| 169 | + return Database.builder() |
| 170 | + .setDatabaseName(name) |
| 171 | + .setOwnerName("public") |
| 172 | + .setOwnerType(PrincipalType.ROLE) |
| 173 | + .build(); |
| 174 | + } |
| 175 | + |
| 176 | + private static Map<String, String> getNativeExecutionShuffleConfigs() |
| 177 | + { |
| 178 | + ImmutableMap.Builder<String, String> sparkConfigs = ImmutableMap.builder(); |
| 179 | + sparkConfigs.put(SPARK_SHUFFLE_MANAGER, "com.facebook.presto.spark.classloader_interface.PrestoSparkNativeExecutionShuffleManager"); |
| 180 | + sparkConfigs.put(FALLBACK_SPARK_SHUFFLE_MANAGER, "org.apache.spark.shuffle.sort.SortShuffleManager"); |
| 181 | + return sparkConfigs.build(); |
| 182 | + } |
| 183 | + |
| 184 | + public static synchronized Path getBaseDataPath() |
| 185 | + { |
| 186 | + if (dataDirectory.isPresent()) { |
| 187 | + return dataDirectory.get(); |
| 188 | + } |
| 189 | + |
| 190 | + Optional<String> dataDirectoryStr = getProperty("DATA_DIR"); |
| 191 | + if (!dataDirectoryStr.isPresent()) { |
| 192 | + try { |
| 193 | + dataDirectory = Optional.of(createTempDirectory("PrestoTest").toAbsolutePath()); |
| 194 | + } |
| 195 | + catch (IOException e) { |
| 196 | + throw new RuntimeException(e); |
| 197 | + } |
| 198 | + } |
| 199 | + else { |
| 200 | + dataDirectory = Optional.of(getNativeQueryRunnerParameters().dataDirectory); |
| 201 | + } |
| 202 | + return dataDirectory.get(); |
| 203 | + } |
| 204 | +} |
0 commit comments